xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 2e71c61d18e71e694bfbe2b314256bf3aa85754d)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 #include <petscsf.h>
5 
6 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
8 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
9 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
10 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
11 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
14 
15 #undef __FUNCT__
16 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
17 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
18 {
19   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
20   PetscErrorCode ierr;
21   PetscInt       i,*idxb = 0;
22   PetscScalar    *va,*vb;
23   Vec            vtmp;
24 
25   PetscFunctionBegin;
26   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
27   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
28   if (idx) {
29     for (i=0; i<A->rmap->n; i++) {
30       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
31     }
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap->n; i++) {
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
41       va[i] = vb[i];
42       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
43     }
44   }
45 
46   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
47   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
48   ierr = PetscFree(idxb);CHKERRQ(ierr);
49   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
55 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
56 {
57   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
58   PetscErrorCode ierr;
59 
60   PetscFunctionBegin;
61   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
62   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 
79 /*
80      Local utility routine that creates a mapping from the global column
81    number to the local number in the off-diagonal part of the local
82    storage of the matrix.  This is done in a non scalable way since the
83    length of colmap equals the global matrix length.
84 */
85 #undef __FUNCT__
86 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
87 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
88 {
89   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
90   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
91   PetscErrorCode ierr;
92   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
93 
94   PetscFunctionBegin;
95 #if defined(PETSC_USE_CTABLE)
96   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
97   for (i=0; i<nbs; i++) {
98     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
99   }
100 #else
101   ierr = PetscMalloc1((baij->Nbs+1),&baij->colmap);CHKERRQ(ierr);
102   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
103   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
105 #endif
106   PetscFunctionReturn(0);
107 }
108 
109 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
110   { \
111  \
112     brow = row/bs;  \
113     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
114     rmax = aimax[brow]; nrow = ailen[brow]; \
115     bcol = col/bs; \
116     ridx = row % bs; cidx = col % bs; \
117     low  = 0; high = nrow; \
118     while (high-low > 3) { \
119       t = (low+high)/2; \
120       if (rp[t] > bcol) high = t; \
121       else              low  = t; \
122     } \
123     for (_i=low; _i<high; _i++) { \
124       if (rp[_i] > bcol) break; \
125       if (rp[_i] == bcol) { \
126         bap = ap +  bs2*_i + bs*cidx + ridx; \
127         if (addv == ADD_VALUES) *bap += value;  \
128         else                    *bap  = value;  \
129         goto a_noinsert; \
130       } \
131     } \
132     if (a->nonew == 1) goto a_noinsert; \
133     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
134     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
135     N = nrow++ - 1;  \
136     /* shift up all the later entries in this row */ \
137     for (ii=N; ii>=_i; ii--) { \
138       rp[ii+1] = rp[ii]; \
139       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
140     } \
141     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
142     rp[_i]                      = bcol;  \
143     ap[bs2*_i + bs*cidx + ridx] = value;  \
144 a_noinsert:; \
145     ailen[brow] = nrow; \
146   }
147 
148 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
149   { \
150     brow = row/bs;  \
151     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
152     rmax = bimax[brow]; nrow = bilen[brow]; \
153     bcol = col/bs; \
154     ridx = row % bs; cidx = col % bs; \
155     low  = 0; high = nrow; \
156     while (high-low > 3) { \
157       t = (low+high)/2; \
158       if (rp[t] > bcol) high = t; \
159       else              low  = t; \
160     } \
161     for (_i=low; _i<high; _i++) { \
162       if (rp[_i] > bcol) break; \
163       if (rp[_i] == bcol) { \
164         bap = ap +  bs2*_i + bs*cidx + ridx; \
165         if (addv == ADD_VALUES) *bap += value;  \
166         else                    *bap  = value;  \
167         goto b_noinsert; \
168       } \
169     } \
170     if (b->nonew == 1) goto b_noinsert; \
171     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
172     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
173     N = nrow++ - 1;  \
174     /* shift up all the later entries in this row */ \
175     for (ii=N; ii>=_i; ii--) { \
176       rp[ii+1] = rp[ii]; \
177       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
178     } \
179     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
180     rp[_i]                      = bcol;  \
181     ap[bs2*_i + bs*cidx + ridx] = value;  \
182 b_noinsert:; \
183     bilen[brow] = nrow; \
184   }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatSetValues_MPIBAIJ"
188 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
189 {
190   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
191   MatScalar      value;
192   PetscBool      roworiented = baij->roworiented;
193   PetscErrorCode ierr;
194   PetscInt       i,j,row,col;
195   PetscInt       rstart_orig=mat->rmap->rstart;
196   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
197   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
198 
199   /* Some Variables required in the macro */
200   Mat         A     = baij->A;
201   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
202   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
203   MatScalar   *aa   =a->a;
204 
205   Mat         B     = baij->B;
206   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
207   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
208   MatScalar   *ba   =b->a;
209 
210   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
211   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
212   MatScalar *ap,*bap;
213 
214   PetscFunctionBegin;
215   if (v) PetscValidScalarPointer(v,6);
216   for (i=0; i<m; i++) {
217     if (im[i] < 0) continue;
218 #if defined(PETSC_USE_DEBUG)
219     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
220 #endif
221     if (im[i] >= rstart_orig && im[i] < rend_orig) {
222       row = im[i] - rstart_orig;
223       for (j=0; j<n; j++) {
224         if (in[j] >= cstart_orig && in[j] < cend_orig) {
225           col = in[j] - cstart_orig;
226           if (roworiented) value = v[i*n+j];
227           else             value = v[i+j*m];
228           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
229           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
230         } else if (in[j] < 0) continue;
231 #if defined(PETSC_USE_DEBUG)
232         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
233 #endif
234         else {
235           if (mat->was_assembled) {
236             if (!baij->colmap) {
237               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
238             }
239 #if defined(PETSC_USE_CTABLE)
240             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
241             col  = col - 1;
242 #else
243             col = baij->colmap[in[j]/bs] - 1;
244 #endif
245             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
246               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
247               col  =  in[j];
248               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
249               B    = baij->B;
250               b    = (Mat_SeqBAIJ*)(B)->data;
251               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
252               ba   =b->a;
253             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
254             else col += in[j]%bs;
255           } else col = in[j];
256           if (roworiented) value = v[i*n+j];
257           else             value = v[i+j*m];
258           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
259           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
260         }
261       }
262     } else {
263       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
264       if (!baij->donotstash) {
265         mat->assembled = PETSC_FALSE;
266         if (roworiented) {
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
268         } else {
269           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
270         }
271       }
272     }
273   }
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
279 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
280 {
281   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
282   const PetscScalar *value;
283   MatScalar         *barray     = baij->barray;
284   PetscBool         roworiented = baij->roworiented;
285   PetscErrorCode    ierr;
286   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
287   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
288   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
289 
290   PetscFunctionBegin;
291   if (!barray) {
292     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
293     baij->barray = barray;
294   }
295 
296   if (roworiented) stepval = (n-1)*bs;
297   else stepval = (m-1)*bs;
298 
299   for (i=0; i<m; i++) {
300     if (im[i] < 0) continue;
301 #if defined(PETSC_USE_DEBUG)
302     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
303 #endif
304     if (im[i] >= rstart && im[i] < rend) {
305       row = im[i] - rstart;
306       for (j=0; j<n; j++) {
307         /* If NumCol = 1 then a copy is not required */
308         if ((roworiented) && (n == 1)) {
309           barray = (MatScalar*)v + i*bs2;
310         } else if ((!roworiented) && (m == 1)) {
311           barray = (MatScalar*)v + j*bs2;
312         } else { /* Here a copy is required */
313           if (roworiented) {
314             value = v + (i*(stepval+bs) + j)*bs;
315           } else {
316             value = v + (j*(stepval+bs) + i)*bs;
317           }
318           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
319             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
320             barray += bs;
321           }
322           barray -= bs2;
323         }
324 
325         if (in[j] >= cstart && in[j] < cend) {
326           col  = in[j] - cstart;
327           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
328         } else if (in[j] < 0) continue;
329 #if defined(PETSC_USE_DEBUG)
330         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
331 #endif
332         else {
333           if (mat->was_assembled) {
334             if (!baij->colmap) {
335               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
336             }
337 
338 #if defined(PETSC_USE_DEBUG)
339 #if defined(PETSC_USE_CTABLE)
340             { PetscInt data;
341               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
342               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
343             }
344 #else
345             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346 #endif
347 #endif
348 #if defined(PETSC_USE_CTABLE)
349             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
350             col  = (col - 1)/bs;
351 #else
352             col = (baij->colmap[in[j]] - 1)/bs;
353 #endif
354             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
355               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
356               col  =  in[j];
357             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
358           } else col = in[j];
359           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
360         }
361       }
362     } else {
363       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
364       if (!baij->donotstash) {
365         if (roworiented) {
366           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
367         } else {
368           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
369         }
370       }
371     }
372   }
373   PetscFunctionReturn(0);
374 }
375 
376 #define HASH_KEY 0.6180339887
377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
378 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
379 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
380 #undef __FUNCT__
381 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
382 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
383 {
384   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
385   PetscBool      roworiented = baij->roworiented;
386   PetscErrorCode ierr;
387   PetscInt       i,j,row,col;
388   PetscInt       rstart_orig=mat->rmap->rstart;
389   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
390   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
391   PetscReal      tmp;
392   MatScalar      **HD = baij->hd,value;
393 #if defined(PETSC_USE_DEBUG)
394   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
395 #endif
396 
397   PetscFunctionBegin;
398   if (v) PetscValidScalarPointer(v,6);
399   for (i=0; i<m; i++) {
400 #if defined(PETSC_USE_DEBUG)
401     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
402     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
403 #endif
404     row = im[i];
405     if (row >= rstart_orig && row < rend_orig) {
406       for (j=0; j<n; j++) {
407         col = in[j];
408         if (roworiented) value = v[i*n+j];
409         else             value = v[i+j*m];
410         /* Look up PetscInto the Hash Table */
411         key = (row/bs)*Nbs+(col/bs)+1;
412         h1  = HASH(size,key,tmp);
413 
414 
415         idx = h1;
416 #if defined(PETSC_USE_DEBUG)
417         insert_ct++;
418         total_ct++;
419         if (HT[idx] != key) {
420           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
421           if (idx == size) {
422             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
423             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
424           }
425         }
426 #else
427         if (HT[idx] != key) {
428           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
429           if (idx == size) {
430             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
431             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
432           }
433         }
434 #endif
435         /* A HASH table entry is found, so insert the values at the correct address */
436         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
437         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
438       }
439     } else if (!baij->donotstash) {
440       if (roworiented) {
441         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
442       } else {
443         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
444       }
445     }
446   }
447 #if defined(PETSC_USE_DEBUG)
448   baij->ht_total_ct  = total_ct;
449   baij->ht_insert_ct = insert_ct;
450 #endif
451   PetscFunctionReturn(0);
452 }
453 
454 #undef __FUNCT__
455 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
456 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
457 {
458   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
459   PetscBool         roworiented = baij->roworiented;
460   PetscErrorCode    ierr;
461   PetscInt          i,j,ii,jj,row,col;
462   PetscInt          rstart=baij->rstartbs;
463   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
464   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
465   PetscReal         tmp;
466   MatScalar         **HD = baij->hd,*baij_a;
467   const PetscScalar *v_t,*value;
468 #if defined(PETSC_USE_DEBUG)
469   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
470 #endif
471 
472   PetscFunctionBegin;
473   if (roworiented) stepval = (n-1)*bs;
474   else stepval = (m-1)*bs;
475 
476   for (i=0; i<m; i++) {
477 #if defined(PETSC_USE_DEBUG)
478     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
479     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
480 #endif
481     row = im[i];
482     v_t = v + i*nbs2;
483     if (row >= rstart && row < rend) {
484       for (j=0; j<n; j++) {
485         col = in[j];
486 
487         /* Look up into the Hash Table */
488         key = row*Nbs+col+1;
489         h1  = HASH(size,key,tmp);
490 
491         idx = h1;
492 #if defined(PETSC_USE_DEBUG)
493         total_ct++;
494         insert_ct++;
495         if (HT[idx] != key) {
496           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
497           if (idx == size) {
498             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
499             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
500           }
501         }
502 #else
503         if (HT[idx] != key) {
504           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
505           if (idx == size) {
506             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
507             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
508           }
509         }
510 #endif
511         baij_a = HD[idx];
512         if (roworiented) {
513           /*value = v + i*(stepval+bs)*bs + j*bs;*/
514           /* value = v + (i*(stepval+bs)+j)*bs; */
515           value = v_t;
516           v_t  += bs;
517           if (addv == ADD_VALUES) {
518             for (ii=0; ii<bs; ii++,value+=stepval) {
519               for (jj=ii; jj<bs2; jj+=bs) {
520                 baij_a[jj] += *value++;
521               }
522             }
523           } else {
524             for (ii=0; ii<bs; ii++,value+=stepval) {
525               for (jj=ii; jj<bs2; jj+=bs) {
526                 baij_a[jj] = *value++;
527               }
528             }
529           }
530         } else {
531           value = v + j*(stepval+bs)*bs + i*bs;
532           if (addv == ADD_VALUES) {
533             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
534               for (jj=0; jj<bs; jj++) {
535                 baij_a[jj] += *value++;
536               }
537             }
538           } else {
539             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
540               for (jj=0; jj<bs; jj++) {
541                 baij_a[jj] = *value++;
542               }
543             }
544           }
545         }
546       }
547     } else {
548       if (!baij->donotstash) {
549         if (roworiented) {
550           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
551         } else {
552           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
553         }
554       }
555     }
556   }
557 #if defined(PETSC_USE_DEBUG)
558   baij->ht_total_ct  = total_ct;
559   baij->ht_insert_ct = insert_ct;
560 #endif
561   PetscFunctionReturn(0);
562 }
563 
564 #undef __FUNCT__
565 #define __FUNCT__ "MatGetValues_MPIBAIJ"
566 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
567 {
568   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
569   PetscErrorCode ierr;
570   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
571   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
572 
573   PetscFunctionBegin;
574   for (i=0; i<m; i++) {
575     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
576     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
577     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
578       row = idxm[i] - bsrstart;
579       for (j=0; j<n; j++) {
580         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
581         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
582         if (idxn[j] >= bscstart && idxn[j] < bscend) {
583           col  = idxn[j] - bscstart;
584           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
585         } else {
586           if (!baij->colmap) {
587             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
588           }
589 #if defined(PETSC_USE_CTABLE)
590           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
591           data--;
592 #else
593           data = baij->colmap[idxn[j]/bs]-1;
594 #endif
595           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
596           else {
597             col  = data + idxn[j]%bs;
598             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
599           }
600         }
601       }
602     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 #undef __FUNCT__
608 #define __FUNCT__ "MatNorm_MPIBAIJ"
609 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
610 {
611   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
612   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
613   PetscErrorCode ierr;
614   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
615   PetscReal      sum = 0.0;
616   MatScalar      *v;
617 
618   PetscFunctionBegin;
619   if (baij->size == 1) {
620     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
621   } else {
622     if (type == NORM_FROBENIUS) {
623       v  = amat->a;
624       nz = amat->nz*bs2;
625       for (i=0; i<nz; i++) {
626         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
627       }
628       v  = bmat->a;
629       nz = bmat->nz*bs2;
630       for (i=0; i<nz; i++) {
631         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
632       }
633       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
634       *nrm = PetscSqrtReal(*nrm);
635     } else if (type == NORM_1) { /* max column sum */
636       PetscReal *tmp,*tmp2;
637       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
638       ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr);
639       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
640       v    = amat->a; jj = amat->j;
641       for (i=0; i<amat->nz; i++) {
642         for (j=0; j<bs; j++) {
643           col = bs*(cstart + *jj) + j; /* column index */
644           for (row=0; row<bs; row++) {
645             tmp[col] += PetscAbsScalar(*v);  v++;
646           }
647         }
648         jj++;
649       }
650       v = bmat->a; jj = bmat->j;
651       for (i=0; i<bmat->nz; i++) {
652         for (j=0; j<bs; j++) {
653           col = bs*garray[*jj] + j;
654           for (row=0; row<bs; row++) {
655             tmp[col] += PetscAbsScalar(*v); v++;
656           }
657         }
658         jj++;
659       }
660       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
661       *nrm = 0.0;
662       for (j=0; j<mat->cmap->N; j++) {
663         if (tmp2[j] > *nrm) *nrm = tmp2[j];
664       }
665       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
666     } else if (type == NORM_INFINITY) { /* max row sum */
667       PetscReal *sums;
668       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
669       sum  = 0.0;
670       for (j=0; j<amat->mbs; j++) {
671         for (row=0; row<bs; row++) sums[row] = 0.0;
672         v  = amat->a + bs2*amat->i[j];
673         nz = amat->i[j+1]-amat->i[j];
674         for (i=0; i<nz; i++) {
675           for (col=0; col<bs; col++) {
676             for (row=0; row<bs; row++) {
677               sums[row] += PetscAbsScalar(*v); v++;
678             }
679           }
680         }
681         v  = bmat->a + bs2*bmat->i[j];
682         nz = bmat->i[j+1]-bmat->i[j];
683         for (i=0; i<nz; i++) {
684           for (col=0; col<bs; col++) {
685             for (row=0; row<bs; row++) {
686               sums[row] += PetscAbsScalar(*v); v++;
687             }
688           }
689         }
690         for (row=0; row<bs; row++) {
691           if (sums[row] > sum) sum = sums[row];
692         }
693       }
694       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
695       ierr = PetscFree(sums);CHKERRQ(ierr);
696     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
697   }
698   PetscFunctionReturn(0);
699 }
700 
701 /*
702   Creates the hash table, and sets the table
703   This table is created only once.
704   If new entried need to be added to the matrix
705   then the hash table has to be destroyed and
706   recreated.
707 */
708 #undef __FUNCT__
709 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
710 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
711 {
712   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
713   Mat            A     = baij->A,B=baij->B;
714   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
715   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
716   PetscErrorCode ierr;
717   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
718   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
719   PetscInt       *HT,key;
720   MatScalar      **HD;
721   PetscReal      tmp;
722 #if defined(PETSC_USE_INFO)
723   PetscInt ct=0,max=0;
724 #endif
725 
726   PetscFunctionBegin;
727   if (baij->ht) PetscFunctionReturn(0);
728 
729   baij->ht_size = (PetscInt)(factor*nz);
730   ht_size       = baij->ht_size;
731 
732   /* Allocate Memory for Hash Table */
733   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
734   HD   = baij->hd;
735   HT   = baij->ht;
736 
737   /* Loop Over A */
738   for (i=0; i<a->mbs; i++) {
739     for (j=ai[i]; j<ai[i+1]; j++) {
740       row = i+rstart;
741       col = aj[j]+cstart;
742 
743       key = row*Nbs + col + 1;
744       h1  = HASH(ht_size,key,tmp);
745       for (k=0; k<ht_size; k++) {
746         if (!HT[(h1+k)%ht_size]) {
747           HT[(h1+k)%ht_size] = key;
748           HD[(h1+k)%ht_size] = a->a + j*bs2;
749           break;
750 #if defined(PETSC_USE_INFO)
751         } else {
752           ct++;
753 #endif
754         }
755       }
756 #if defined(PETSC_USE_INFO)
757       if (k> max) max = k;
758 #endif
759     }
760   }
761   /* Loop Over B */
762   for (i=0; i<b->mbs; i++) {
763     for (j=bi[i]; j<bi[i+1]; j++) {
764       row = i+rstart;
765       col = garray[bj[j]];
766       key = row*Nbs + col + 1;
767       h1  = HASH(ht_size,key,tmp);
768       for (k=0; k<ht_size; k++) {
769         if (!HT[(h1+k)%ht_size]) {
770           HT[(h1+k)%ht_size] = key;
771           HD[(h1+k)%ht_size] = b->a + j*bs2;
772           break;
773 #if defined(PETSC_USE_INFO)
774         } else {
775           ct++;
776 #endif
777         }
778       }
779 #if defined(PETSC_USE_INFO)
780       if (k> max) max = k;
781 #endif
782     }
783   }
784 
785   /* Print Summary */
786 #if defined(PETSC_USE_INFO)
787   for (i=0,j=0; i<ht_size; i++) {
788     if (HT[i]) j++;
789   }
790   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
791 #endif
792   PetscFunctionReturn(0);
793 }
794 
795 #undef __FUNCT__
796 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
797 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
798 {
799   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
800   PetscErrorCode ierr;
801   PetscInt       nstash,reallocs;
802   InsertMode     addv;
803 
804   PetscFunctionBegin;
805   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
806 
807   /* make sure all processors are either in INSERTMODE or ADDMODE */
808   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
809   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
810   mat->insertmode = addv; /* in case this processor had no cache */
811 
812   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
813   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
814   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
815   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
816   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
817   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
818   PetscFunctionReturn(0);
819 }
820 
821 #undef __FUNCT__
822 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
823 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
824 {
825   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
826   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
827   PetscErrorCode ierr;
828   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
829   PetscInt       *row,*col;
830   PetscBool      r1,r2,r3,other_disassembled;
831   MatScalar      *val;
832   InsertMode     addv = mat->insertmode;
833   PetscMPIInt    n;
834 
835   PetscFunctionBegin;
836   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
837   if (!baij->donotstash && !mat->nooffprocentries) {
838     while (1) {
839       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
840       if (!flg) break;
841 
842       for (i=0; i<n;) {
843         /* Now identify the consecutive vals belonging to the same row */
844         for (j=i,rstart=row[j]; j<n; j++) {
845           if (row[j] != rstart) break;
846         }
847         if (j < n) ncols = j-i;
848         else       ncols = n-i;
849         /* Now assemble all these values with a single function call */
850         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
851         i    = j;
852       }
853     }
854     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
855     /* Now process the block-stash. Since the values are stashed column-oriented,
856        set the roworiented flag to column oriented, and after MatSetValues()
857        restore the original flags */
858     r1 = baij->roworiented;
859     r2 = a->roworiented;
860     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
861 
862     baij->roworiented = PETSC_FALSE;
863     a->roworiented    = PETSC_FALSE;
864 
865     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
866     while (1) {
867       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
868       if (!flg) break;
869 
870       for (i=0; i<n;) {
871         /* Now identify the consecutive vals belonging to the same row */
872         for (j=i,rstart=row[j]; j<n; j++) {
873           if (row[j] != rstart) break;
874         }
875         if (j < n) ncols = j-i;
876         else       ncols = n-i;
877         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
878         i    = j;
879       }
880     }
881     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
882 
883     baij->roworiented = r1;
884     a->roworiented    = r2;
885 
886     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
887   }
888 
889   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
890   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
891 
892   /* determine if any processor has disassembled, if so we must
893      also disassemble ourselfs, in order that we may reassemble. */
894   /*
895      if nonzero structure of submatrix B cannot change then we know that
896      no processor disassembled thus we can skip this stuff
897   */
898   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
899     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
900     if (mat->was_assembled && !other_disassembled) {
901       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
902     }
903   }
904 
905   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
906     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
907   }
908   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
909   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
910 
911 #if defined(PETSC_USE_INFO)
912   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
913     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
914 
915     baij->ht_total_ct  = 0;
916     baij->ht_insert_ct = 0;
917   }
918 #endif
919   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
920     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
921 
922     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
923     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
924   }
925 
926   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
927 
928   baij->rowvalues = 0;
929   PetscFunctionReturn(0);
930 }
931 
932 #include <petscdraw.h>
933 #undef __FUNCT__
934 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
935 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
936 {
937   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
938   PetscErrorCode    ierr;
939   PetscMPIInt       size = baij->size,rank = baij->rank;
940   PetscInt          bs   = mat->rmap->bs;
941   PetscBool         iascii,isdraw;
942   PetscViewer       sviewer;
943   PetscViewerFormat format;
944 
945   PetscFunctionBegin;
946   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
947   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
948   if (iascii) {
949     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
950     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
951       MatInfo info;
952       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
953       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
954       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
955       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
956                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
957       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
958       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
959       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
960       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
961       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
962       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
963       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
964       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
965       PetscFunctionReturn(0);
966     } else if (format == PETSC_VIEWER_ASCII_INFO) {
967       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
968       PetscFunctionReturn(0);
969     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
970       PetscFunctionReturn(0);
971     }
972   }
973 
974   if (isdraw) {
975     PetscDraw draw;
976     PetscBool isnull;
977     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
978     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
979   }
980 
981   if (size == 1) {
982     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
983     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
984   } else {
985     /* assemble the entire matrix onto first processor. */
986     Mat         A;
987     Mat_SeqBAIJ *Aloc;
988     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
989     MatScalar   *a;
990 
991     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
992     /* Perhaps this should be the type of mat? */
993     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
994     if (!rank) {
995       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
996     } else {
997       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
998     }
999     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1000     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1001     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1002     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1003 
1004     /* copy over the A part */
1005     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1006     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1007     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1008 
1009     for (i=0; i<mbs; i++) {
1010       rvals[0] = bs*(baij->rstartbs + i);
1011       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1012       for (j=ai[i]; j<ai[i+1]; j++) {
1013         col = (baij->cstartbs+aj[j])*bs;
1014         for (k=0; k<bs; k++) {
1015           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1016           col++; a += bs;
1017         }
1018       }
1019     }
1020     /* copy over the B part */
1021     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1022     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1023     for (i=0; i<mbs; i++) {
1024       rvals[0] = bs*(baij->rstartbs + i);
1025       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1026       for (j=ai[i]; j<ai[i+1]; j++) {
1027         col = baij->garray[aj[j]]*bs;
1028         for (k=0; k<bs; k++) {
1029           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1030           col++; a += bs;
1031         }
1032       }
1033     }
1034     ierr = PetscFree(rvals);CHKERRQ(ierr);
1035     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1036     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1037     /*
1038        Everyone has to call to draw the matrix since the graphics waits are
1039        synchronized across all processors that share the PetscDraw object
1040     */
1041     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1042     if (!rank) {
1043       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1044       /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1045       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1046       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1047     }
1048     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1049     ierr = MatDestroy(&A);CHKERRQ(ierr);
1050   }
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 #undef __FUNCT__
1055 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1056 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1057 {
1058   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1059   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1060   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1061   PetscErrorCode ierr;
1062   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1063   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1064   int            fd;
1065   PetscScalar    *column_values;
1066   FILE           *file;
1067   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1068   PetscInt       message_count,flowcontrolcount;
1069 
1070   PetscFunctionBegin;
1071   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1072   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1073   nz   = bs2*(A->nz + B->nz);
1074   rlen = mat->rmap->n;
1075   if (!rank) {
1076     header[0] = MAT_FILE_CLASSID;
1077     header[1] = mat->rmap->N;
1078     header[2] = mat->cmap->N;
1079 
1080     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1081     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1082     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1083     /* get largest number of rows any processor has */
1084     range = mat->rmap->range;
1085     for (i=1; i<size; i++) {
1086       rlen = PetscMax(rlen,range[i+1] - range[i]);
1087     }
1088   } else {
1089     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1090   }
1091 
1092   ierr = PetscMalloc1((rlen/bs),&crow_lens);CHKERRQ(ierr);
1093   /* compute lengths of each row  */
1094   for (i=0; i<a->mbs; i++) {
1095     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1096   }
1097   /* store the row lengths to the file */
1098   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1099   if (!rank) {
1100     MPI_Status status;
1101     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1102     rlen = (range[1] - range[0])/bs;
1103     for (i=0; i<rlen; i++) {
1104       for (j=0; j<bs; j++) {
1105         row_lens[i*bs+j] = bs*crow_lens[i];
1106       }
1107     }
1108     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1109     for (i=1; i<size; i++) {
1110       rlen = (range[i+1] - range[i])/bs;
1111       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1112       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1113       for (k=0; k<rlen; k++) {
1114         for (j=0; j<bs; j++) {
1115           row_lens[k*bs+j] = bs*crow_lens[k];
1116         }
1117       }
1118       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1119     }
1120     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1121     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1122   } else {
1123     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1124     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1125     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1126   }
1127   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1128 
1129   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1130      information needed to make it for each row from a block row. This does require more communication but still not more than
1131      the communication needed for the nonzero values  */
1132   nzmax = nz; /*  space a largest processor needs */
1133   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1134   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1135   cnt   = 0;
1136   for (i=0; i<a->mbs; i++) {
1137     pcnt = cnt;
1138     for (j=B->i[i]; j<B->i[i+1]; j++) {
1139       if ((col = garray[B->j[j]]) > cstart) break;
1140       for (l=0; l<bs; l++) {
1141         column_indices[cnt++] = bs*col+l;
1142       }
1143     }
1144     for (k=A->i[i]; k<A->i[i+1]; k++) {
1145       for (l=0; l<bs; l++) {
1146         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1147       }
1148     }
1149     for (; j<B->i[i+1]; j++) {
1150       for (l=0; l<bs; l++) {
1151         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1152       }
1153     }
1154     len = cnt - pcnt;
1155     for (k=1; k<bs; k++) {
1156       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1157       cnt += len;
1158     }
1159   }
1160   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1161 
1162   /* store the columns to the file */
1163   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1164   if (!rank) {
1165     MPI_Status status;
1166     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1167     for (i=1; i<size; i++) {
1168       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1169       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1170       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1171       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1172     }
1173     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1174   } else {
1175     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1176     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1177     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1178     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1179   }
1180   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1181 
1182   /* load up the numerical values */
1183   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1184   cnt  = 0;
1185   for (i=0; i<a->mbs; i++) {
1186     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1187     for (j=B->i[i]; j<B->i[i+1]; j++) {
1188       if (garray[B->j[j]] > cstart) break;
1189       for (l=0; l<bs; l++) {
1190         for (ll=0; ll<bs; ll++) {
1191           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1192         }
1193       }
1194       cnt += bs;
1195     }
1196     for (k=A->i[i]; k<A->i[i+1]; k++) {
1197       for (l=0; l<bs; l++) {
1198         for (ll=0; ll<bs; ll++) {
1199           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1200         }
1201       }
1202       cnt += bs;
1203     }
1204     for (; j<B->i[i+1]; j++) {
1205       for (l=0; l<bs; l++) {
1206         for (ll=0; ll<bs; ll++) {
1207           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1208         }
1209       }
1210       cnt += bs;
1211     }
1212     cnt += (bs-1)*rlen;
1213   }
1214   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1215 
1216   /* store the column values to the file */
1217   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1218   if (!rank) {
1219     MPI_Status status;
1220     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1221     for (i=1; i<size; i++) {
1222       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1223       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1224       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1225       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1226     }
1227     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1228   } else {
1229     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1230     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1231     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1232     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1233   }
1234   ierr = PetscFree(column_values);CHKERRQ(ierr);
1235 
1236   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1237   if (file) {
1238     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1239   }
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 #undef __FUNCT__
1244 #define __FUNCT__ "MatView_MPIBAIJ"
1245 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1246 {
1247   PetscErrorCode ierr;
1248   PetscBool      iascii,isdraw,issocket,isbinary;
1249 
1250   PetscFunctionBegin;
1251   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1252   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1253   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1254   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1255   if (iascii || isdraw || issocket) {
1256     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1257   } else if (isbinary) {
1258     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1259   }
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1265 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1266 {
1267   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1268   PetscErrorCode ierr;
1269 
1270   PetscFunctionBegin;
1271 #if defined(PETSC_USE_LOG)
1272   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1273 #endif
1274   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1275   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1276   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1277   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1278 #if defined(PETSC_USE_CTABLE)
1279   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1280 #else
1281   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1282 #endif
1283   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1284   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1285   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1286   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1287   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1288   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1289   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1290   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1291 
1292   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1294   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatMult_MPIBAIJ"
1307 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1308 {
1309   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1310   PetscErrorCode ierr;
1311   PetscInt       nt;
1312 
1313   PetscFunctionBegin;
1314   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1315   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1316   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1317   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1318   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1319   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1320   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1321   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1327 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1328 {
1329   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1330   PetscErrorCode ierr;
1331 
1332   PetscFunctionBegin;
1333   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1334   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1335   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1336   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1342 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1343 {
1344   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1345   PetscErrorCode ierr;
1346   PetscBool      merged;
1347 
1348   PetscFunctionBegin;
1349   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1350   /* do nondiagonal part */
1351   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1352   if (!merged) {
1353     /* send it on its way */
1354     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1355     /* do local part */
1356     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1357     /* receive remote parts: note this assumes the values are not actually */
1358     /* inserted in yy until the next line */
1359     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1360   } else {
1361     /* do local part */
1362     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1363     /* send it on its way */
1364     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1365     /* values actually were received in the Begin() but we need to call this nop */
1366     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1367   }
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 #undef __FUNCT__
1372 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1373 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1374 {
1375   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1376   PetscErrorCode ierr;
1377 
1378   PetscFunctionBegin;
1379   /* do nondiagonal part */
1380   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1381   /* send it on its way */
1382   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1383   /* do local part */
1384   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1385   /* receive remote parts: note this assumes the values are not actually */
1386   /* inserted in yy until the next line, which is true for my implementation*/
1387   /* but is not perhaps always true. */
1388   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 /*
1393   This only works correctly for square matrices where the subblock A->A is the
1394    diagonal block
1395 */
1396 #undef __FUNCT__
1397 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1398 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1399 {
1400   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1401   PetscErrorCode ierr;
1402 
1403   PetscFunctionBegin;
1404   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1405   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 #undef __FUNCT__
1410 #define __FUNCT__ "MatScale_MPIBAIJ"
1411 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1412 {
1413   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1414   PetscErrorCode ierr;
1415 
1416   PetscFunctionBegin;
1417   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1418   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1424 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1425 {
1426   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1427   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1428   PetscErrorCode ierr;
1429   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1430   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1431   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1432 
1433   PetscFunctionBegin;
1434   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1435   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1436   mat->getrowactive = PETSC_TRUE;
1437 
1438   if (!mat->rowvalues && (idx || v)) {
1439     /*
1440         allocate enough space to hold information from the longest row.
1441     */
1442     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1443     PetscInt    max = 1,mbs = mat->mbs,tmp;
1444     for (i=0; i<mbs; i++) {
1445       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1446       if (max < tmp) max = tmp;
1447     }
1448     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1449   }
1450   lrow = row - brstart;
1451 
1452   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1453   if (!v)   {pvA = 0; pvB = 0;}
1454   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1455   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1456   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1457   nztot = nzA + nzB;
1458 
1459   cmap = mat->garray;
1460   if (v  || idx) {
1461     if (nztot) {
1462       /* Sort by increasing column numbers, assuming A and B already sorted */
1463       PetscInt imark = -1;
1464       if (v) {
1465         *v = v_p = mat->rowvalues;
1466         for (i=0; i<nzB; i++) {
1467           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1468           else break;
1469         }
1470         imark = i;
1471         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1472         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1473       }
1474       if (idx) {
1475         *idx = idx_p = mat->rowindices;
1476         if (imark > -1) {
1477           for (i=0; i<imark; i++) {
1478             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1479           }
1480         } else {
1481           for (i=0; i<nzB; i++) {
1482             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1483             else break;
1484           }
1485           imark = i;
1486         }
1487         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1488         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1489       }
1490     } else {
1491       if (idx) *idx = 0;
1492       if (v)   *v   = 0;
1493     }
1494   }
1495   *nz  = nztot;
1496   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1497   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1498   PetscFunctionReturn(0);
1499 }
1500 
1501 #undef __FUNCT__
1502 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1503 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1504 {
1505   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1506 
1507   PetscFunctionBegin;
1508   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1509   baij->getrowactive = PETSC_FALSE;
1510   PetscFunctionReturn(0);
1511 }
1512 
1513 #undef __FUNCT__
1514 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1515 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1516 {
1517   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1518   PetscErrorCode ierr;
1519 
1520   PetscFunctionBegin;
1521   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1522   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 #undef __FUNCT__
1527 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1528 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1529 {
1530   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1531   Mat            A  = a->A,B = a->B;
1532   PetscErrorCode ierr;
1533   PetscReal      isend[5],irecv[5];
1534 
1535   PetscFunctionBegin;
1536   info->block_size = (PetscReal)matin->rmap->bs;
1537 
1538   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1539 
1540   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1541   isend[3] = info->memory;  isend[4] = info->mallocs;
1542 
1543   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1544 
1545   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1546   isend[3] += info->memory;  isend[4] += info->mallocs;
1547 
1548   if (flag == MAT_LOCAL) {
1549     info->nz_used      = isend[0];
1550     info->nz_allocated = isend[1];
1551     info->nz_unneeded  = isend[2];
1552     info->memory       = isend[3];
1553     info->mallocs      = isend[4];
1554   } else if (flag == MAT_GLOBAL_MAX) {
1555     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1556 
1557     info->nz_used      = irecv[0];
1558     info->nz_allocated = irecv[1];
1559     info->nz_unneeded  = irecv[2];
1560     info->memory       = irecv[3];
1561     info->mallocs      = irecv[4];
1562   } else if (flag == MAT_GLOBAL_SUM) {
1563     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1564 
1565     info->nz_used      = irecv[0];
1566     info->nz_allocated = irecv[1];
1567     info->nz_unneeded  = irecv[2];
1568     info->memory       = irecv[3];
1569     info->mallocs      = irecv[4];
1570   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1571   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1572   info->fill_ratio_needed = 0;
1573   info->factor_mallocs    = 0;
1574   PetscFunctionReturn(0);
1575 }
1576 
1577 #undef __FUNCT__
1578 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1579 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1580 {
1581   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1582   PetscErrorCode ierr;
1583 
1584   PetscFunctionBegin;
1585   switch (op) {
1586   case MAT_NEW_NONZERO_LOCATIONS:
1587   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1588   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1589   case MAT_KEEP_NONZERO_PATTERN:
1590   case MAT_NEW_NONZERO_LOCATION_ERR:
1591     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1592     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1593     break;
1594   case MAT_ROW_ORIENTED:
1595     a->roworiented = flg;
1596 
1597     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1598     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1599     break;
1600   case MAT_NEW_DIAGONALS:
1601     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1602     break;
1603   case MAT_IGNORE_OFF_PROC_ENTRIES:
1604     a->donotstash = flg;
1605     break;
1606   case MAT_USE_HASH_TABLE:
1607     a->ht_flag = flg;
1608     break;
1609   case MAT_SYMMETRIC:
1610   case MAT_STRUCTURALLY_SYMMETRIC:
1611   case MAT_HERMITIAN:
1612   case MAT_SYMMETRY_ETERNAL:
1613     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1614     break;
1615   default:
1616     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1617   }
1618   PetscFunctionReturn(0);
1619 }
1620 
1621 #undef __FUNCT__
1622 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1623 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1624 {
1625   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1626   Mat_SeqBAIJ    *Aloc;
1627   Mat            B;
1628   PetscErrorCode ierr;
1629   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1630   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1631   MatScalar      *a;
1632 
1633   PetscFunctionBegin;
1634   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1635   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1636     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1637     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1638     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1639     /* Do not know preallocation information, but must set block size */
1640     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1641   } else {
1642     B = *matout;
1643   }
1644 
1645   /* copy over the A part */
1646   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1647   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1648   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1649 
1650   for (i=0; i<mbs; i++) {
1651     rvals[0] = bs*(baij->rstartbs + i);
1652     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1653     for (j=ai[i]; j<ai[i+1]; j++) {
1654       col = (baij->cstartbs+aj[j])*bs;
1655       for (k=0; k<bs; k++) {
1656         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1657 
1658         col++; a += bs;
1659       }
1660     }
1661   }
1662   /* copy over the B part */
1663   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1664   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1665   for (i=0; i<mbs; i++) {
1666     rvals[0] = bs*(baij->rstartbs + i);
1667     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1668     for (j=ai[i]; j<ai[i+1]; j++) {
1669       col = baij->garray[aj[j]]*bs;
1670       for (k=0; k<bs; k++) {
1671         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1672         col++;
1673         a += bs;
1674       }
1675     }
1676   }
1677   ierr = PetscFree(rvals);CHKERRQ(ierr);
1678   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1679   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1680 
1681   if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
1682   else {
1683     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1684   }
1685   PetscFunctionReturn(0);
1686 }
1687 
1688 #undef __FUNCT__
1689 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1690 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1691 {
1692   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1693   Mat            a     = baij->A,b = baij->B;
1694   PetscErrorCode ierr;
1695   PetscInt       s1,s2,s3;
1696 
1697   PetscFunctionBegin;
1698   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1699   if (rr) {
1700     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1701     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1702     /* Overlap communication with computation. */
1703     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1704   }
1705   if (ll) {
1706     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1707     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1708     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1709   }
1710   /* scale  the diagonal block */
1711   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1712 
1713   if (rr) {
1714     /* Do a scatter end and then right scale the off-diagonal block */
1715     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1716     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1717   }
1718   PetscFunctionReturn(0);
1719 }
1720 
1721 #undef __FUNCT__
1722 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1723 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1724 {
1725   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1726   PetscInt      *owners = A->rmap->range;
1727   PetscInt       n      = A->rmap->n;
1728   PetscMPIInt    size   = l->size;
1729   PetscSF        sf;
1730   PetscInt      *lrows;
1731   PetscSFNode   *rrows;
1732   PetscInt       lastidx = -1, r, p = 0, len = 0;
1733   PetscErrorCode ierr;
1734 
1735   PetscFunctionBegin;
1736   /* Create SF where leaves are input rows and roots are owned rows */
1737   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1738   for (r = 0; r < n; ++r) lrows[r] = -1;
1739   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1740   for (r = 0; r < N; ++r) {
1741     const PetscInt idx   = rows[r];
1742     PetscBool      found = PETSC_FALSE;
1743     /* Trick for efficient searching for sorted rows */
1744     if (lastidx > idx) p = 0;
1745     lastidx = idx;
1746     for (; p < size; ++p) {
1747       if (idx >= owners[p] && idx < owners[p+1]) {
1748         rrows[r].rank  = p;
1749         rrows[r].index = rows[r] - owners[p];
1750         found = PETSC_TRUE;
1751         break;
1752       }
1753     }
1754     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
1755   }
1756   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1757   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1758   /* Collect flags for rows to be zeroed */
1759   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1760   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1761   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1762   /* Compress and put in row numbers */
1763   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1764   /* fix right hand side if needed */
1765   if (x && b) {
1766     const PetscScalar *xx;
1767     PetscScalar       *bb;
1768 
1769     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1770     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1771     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1772     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1773     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1774   }
1775 
1776   /* actually zap the local rows */
1777   /*
1778         Zero the required rows. If the "diagonal block" of the matrix
1779      is square and the user wishes to set the diagonal we use separate
1780      code so that MatSetValues() is not called for each diagonal allocating
1781      new memory, thus calling lots of mallocs and slowing things down.
1782 
1783   */
1784   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1785   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,0,0);CHKERRQ(ierr);
1786   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1787     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,0,0);CHKERRQ(ierr);
1788   } else if (diag != 0.0) {
1789     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1790     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1791        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1792     for (r = 0; r < len; ++r) {
1793       const PetscInt row = lrows[r] + A->rmap->rstart;
1794       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1795     }
1796     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1797     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1798   } else {
1799     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1800   }
1801   ierr = PetscFree(lrows);CHKERRQ(ierr);
1802   PetscFunctionReturn(0);
1803 }
1804 
1805 #undef __FUNCT__
1806 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ"
1807 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1808 {
1809   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1810   PetscErrorCode    ierr;
1811   PetscMPIInt       size = l->size,n = A->rmap->n,lastidx = -1;
1812   PetscInt          i,j,k,r,p = 0,len = 0,row,col,count;
1813   PetscInt          *lrows,*owners = A->rmap->range;
1814   PetscSFNode       *rrows;
1815   PetscSF           sf;
1816   const PetscScalar *xx;
1817   PetscScalar       *bb,*mask;
1818   Vec               xmask,lmask;
1819   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1820   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1821   PetscScalar       *aa;
1822 #if defined(PETSC_DEBUG)
1823   PetscBool found = PETSC_FALSE;
1824 #endif
1825 
1826   PetscFunctionBegin;
1827   /* Create SF where leaves are input rows and roots are owned rows */
1828   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1829   for (r = 0; r < n; ++r) lrows[r] = -1;
1830   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1831   for (r = 0; r < N; ++r) {
1832     const PetscInt idx   = rows[r];
1833     PetscBool      found = PETSC_FALSE;
1834     /* Trick for efficient searching for sorted rows */
1835     if (lastidx > idx) p = 0;
1836     lastidx = idx;
1837     for (; p < size; ++p) {
1838       if (idx >= owners[p] && idx < owners[p+1]) {
1839         rrows[r].rank  = p;
1840         rrows[r].index = rows[r] - owners[p];
1841         found = PETSC_TRUE;
1842         break;
1843       }
1844     }
1845     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
1846   }
1847   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1848   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1849   /* Collect flags for rows to be zeroed */
1850   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1851   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1852   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1853   /* Compress and put in row numbers */
1854   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1855   /* zero diagonal part of matrix */
1856   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1857   /* handle off diagonal part of matrix */
1858   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1859   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1860   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1861   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1862   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1863   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1864   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1865   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1866   if (x) {
1867     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1868     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1869     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1870     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1871   }
1872   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1873   /* remove zeroed rows of off diagonal matrix */
1874   for (i = 0; i < len; ++i) {
1875     row   = lrows[i];
1876     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1877     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1878     for (k = 0; k < count; ++k) {
1879       aa[0] = 0.0;
1880       aa   += bs;
1881     }
1882   }
1883   /* loop over all elements of off process part of matrix zeroing removed columns*/
1884   for (i = 0; i < l->B->rmap->N; ++i) {
1885     row = i/bs;
1886     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1887       for (k = 0; k < bs; ++k) {
1888         col = bs*baij->j[j] + k;
1889         if (PetscAbsScalar(mask[col])) {
1890           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1891           if (b) bb[i] -= aa[0]*xx[col];
1892           aa[0] = 0.0;
1893         }
1894       }
1895     }
1896   }
1897   if (x) {
1898     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1899     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1900   }
1901   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1902   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1903   ierr = PetscFree(lrows);CHKERRQ(ierr);
1904   PetscFunctionReturn(0);
1905 }
1906 
1907 #undef __FUNCT__
1908 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1909 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1910 {
1911   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1912   PetscErrorCode ierr;
1913 
1914   PetscFunctionBegin;
1915   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1916   PetscFunctionReturn(0);
1917 }
1918 
1919 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1920 
1921 #undef __FUNCT__
1922 #define __FUNCT__ "MatEqual_MPIBAIJ"
1923 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1924 {
1925   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1926   Mat            a,b,c,d;
1927   PetscBool      flg;
1928   PetscErrorCode ierr;
1929 
1930   PetscFunctionBegin;
1931   a = matA->A; b = matA->B;
1932   c = matB->A; d = matB->B;
1933 
1934   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1935   if (flg) {
1936     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1937   }
1938   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 #undef __FUNCT__
1943 #define __FUNCT__ "MatCopy_MPIBAIJ"
1944 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1945 {
1946   PetscErrorCode ierr;
1947   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1948   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1949 
1950   PetscFunctionBegin;
1951   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1952   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1953     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1954   } else {
1955     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1956     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1957   }
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 #undef __FUNCT__
1962 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1963 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1964 {
1965   PetscErrorCode ierr;
1966 
1967   PetscFunctionBegin;
1968   ierr =  MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1974 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1975 {
1976   PetscErrorCode ierr;
1977   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1978   PetscBLASInt   bnz,one=1;
1979   Mat_SeqBAIJ    *x,*y;
1980 
1981   PetscFunctionBegin;
1982   if (str == SAME_NONZERO_PATTERN) {
1983     PetscScalar alpha = a;
1984     x    = (Mat_SeqBAIJ*)xx->A->data;
1985     y    = (Mat_SeqBAIJ*)yy->A->data;
1986     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1987     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1988     x    = (Mat_SeqBAIJ*)xx->B->data;
1989     y    = (Mat_SeqBAIJ*)yy->B->data;
1990     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1991     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1992   } else {
1993     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1994   }
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 #undef __FUNCT__
1999 #define __FUNCT__ "MatRealPart_MPIBAIJ"
2000 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2001 {
2002   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2003   PetscErrorCode ierr;
2004 
2005   PetscFunctionBegin;
2006   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2007   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2008   PetscFunctionReturn(0);
2009 }
2010 
2011 #undef __FUNCT__
2012 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2013 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2014 {
2015   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2016   PetscErrorCode ierr;
2017 
2018   PetscFunctionBegin;
2019   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2020   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2021   PetscFunctionReturn(0);
2022 }
2023 
2024 #undef __FUNCT__
2025 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2026 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2027 {
2028   PetscErrorCode ierr;
2029   IS             iscol_local;
2030   PetscInt       csize;
2031 
2032   PetscFunctionBegin;
2033   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2034   if (call == MAT_REUSE_MATRIX) {
2035     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2036     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2037   } else {
2038     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2039   }
2040   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2041   if (call == MAT_INITIAL_MATRIX) {
2042     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2043     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2044   }
2045   PetscFunctionReturn(0);
2046 }
2047 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2048 #undef __FUNCT__
2049 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2050 /*
2051   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2052   in local and then by concatenating the local matrices the end result.
2053   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2054 */
2055 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2056 {
2057   PetscErrorCode ierr;
2058   PetscMPIInt    rank,size;
2059   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2060   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2061   Mat            M,Mreuse;
2062   MatScalar      *vwork,*aa;
2063   MPI_Comm       comm;
2064   IS             isrow_new, iscol_new;
2065   PetscBool      idflag,allrows, allcols;
2066   Mat_SeqBAIJ    *aij;
2067 
2068   PetscFunctionBegin;
2069   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2070   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2071   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2072   /* The compression and expansion should be avoided. Doesn't point
2073      out errors, might change the indices, hence buggey */
2074   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2075   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2076 
2077   /* Check for special case: each processor gets entire matrix columns */
2078   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2079   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2080   if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
2081   else allcols = PETSC_FALSE;
2082 
2083   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2084   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2085   if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
2086   else allrows = PETSC_FALSE;
2087 
2088   if (call ==  MAT_REUSE_MATRIX) {
2089     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2090     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2091     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2092   } else {
2093     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2094   }
2095   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2096   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2097   /*
2098       m - number of local rows
2099       n - number of columns (same on all processors)
2100       rstart - first row in new global matrix generated
2101   */
2102   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2103   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2104   m    = m/bs;
2105   n    = n/bs;
2106 
2107   if (call == MAT_INITIAL_MATRIX) {
2108     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2109     ii  = aij->i;
2110     jj  = aij->j;
2111 
2112     /*
2113         Determine the number of non-zeros in the diagonal and off-diagonal
2114         portions of the matrix in order to do correct preallocation
2115     */
2116 
2117     /* first get start and end of "diagonal" columns */
2118     if (csize == PETSC_DECIDE) {
2119       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2120       if (mglobal == n*bs) { /* square matrix */
2121         nlocal = m;
2122       } else {
2123         nlocal = n/size + ((n % size) > rank);
2124       }
2125     } else {
2126       nlocal = csize/bs;
2127     }
2128     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2129     rstart = rend - nlocal;
2130     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2131 
2132     /* next, compute all the lengths */
2133     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2134     for (i=0; i<m; i++) {
2135       jend = ii[i+1] - ii[i];
2136       olen = 0;
2137       dlen = 0;
2138       for (j=0; j<jend; j++) {
2139         if (*jj < rstart || *jj >= rend) olen++;
2140         else dlen++;
2141         jj++;
2142       }
2143       olens[i] = olen;
2144       dlens[i] = dlen;
2145     }
2146     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2147     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2148     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2149     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2150     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2151   } else {
2152     PetscInt ml,nl;
2153 
2154     M    = *newmat;
2155     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2156     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2157     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2158     /*
2159          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2160        rather than the slower MatSetValues().
2161     */
2162     M->was_assembled = PETSC_TRUE;
2163     M->assembled     = PETSC_FALSE;
2164   }
2165   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2166   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2167   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2168   ii   = aij->i;
2169   jj   = aij->j;
2170   aa   = aij->a;
2171   for (i=0; i<m; i++) {
2172     row   = rstart/bs + i;
2173     nz    = ii[i+1] - ii[i];
2174     cwork = jj;     jj += nz;
2175     vwork = aa;     aa += nz*bs*bs;
2176     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2177   }
2178 
2179   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2180   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2181   *newmat = M;
2182 
2183   /* save submatrix used in processor for next request */
2184   if (call ==  MAT_INITIAL_MATRIX) {
2185     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2186     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2187   }
2188   PetscFunctionReturn(0);
2189 }
2190 
2191 #undef __FUNCT__
2192 #define __FUNCT__ "MatPermute_MPIBAIJ"
2193 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2194 {
2195   MPI_Comm       comm,pcomm;
2196   PetscInt       clocal_size,nrows;
2197   const PetscInt *rows;
2198   PetscMPIInt    size;
2199   IS             crowp,lcolp;
2200   PetscErrorCode ierr;
2201 
2202   PetscFunctionBegin;
2203   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2204   /* make a collective version of 'rowp' */
2205   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2206   if (pcomm==comm) {
2207     crowp = rowp;
2208   } else {
2209     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2210     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2211     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2212     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2213   }
2214   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2215   /* make a local version of 'colp' */
2216   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2217   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2218   if (size==1) {
2219     lcolp = colp;
2220   } else {
2221     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2222   }
2223   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2224   /* now we just get the submatrix */
2225   ierr = MatGetLocalSize(A,PETSC_NULL,&clocal_size);CHKERRQ(ierr);
2226   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2227   /* clean up */
2228   if (pcomm!=comm) {
2229     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2230   }
2231   if (size>1) {
2232     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2233   }
2234   PetscFunctionReturn(0);
2235 }
2236 
2237 #undef __FUNCT__
2238 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2239 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2240 {
2241   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2242   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2243 
2244   PetscFunctionBegin;
2245   if (nghosts) *nghosts = B->nbs;
2246   if (ghosts) *ghosts = baij->garray;
2247   PetscFunctionReturn(0);
2248 }
2249 
2250 #undef __FUNCT__
2251 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2252 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2253 {
2254   Mat            B;
2255   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2256   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2257   Mat_SeqAIJ     *b;
2258   PetscErrorCode ierr;
2259   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2260   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2261   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2262 
2263   PetscFunctionBegin;
2264   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2265   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2266 
2267   /* ----------------------------------------------------------------
2268      Tell every processor the number of nonzeros per row
2269   */
2270   ierr = PetscMalloc1((A->rmap->N/bs),&lens);CHKERRQ(ierr);
2271   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2272     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2273   }
2274   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2275   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2276   displs    = recvcounts + size;
2277   for (i=0; i<size; i++) {
2278     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2279     displs[i]     = A->rmap->range[i]/bs;
2280   }
2281 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2282   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2283 #else
2284   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2285 #endif
2286   /* ---------------------------------------------------------------
2287      Create the sequential matrix of the same type as the local block diagonal
2288   */
2289   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2290   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2291   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2292   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2293   b    = (Mat_SeqAIJ*)B->data;
2294 
2295   /*--------------------------------------------------------------------
2296     Copy my part of matrix column indices over
2297   */
2298   sendcount  = ad->nz + bd->nz;
2299   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2300   a_jsendbuf = ad->j;
2301   b_jsendbuf = bd->j;
2302   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2303   cnt        = 0;
2304   for (i=0; i<n; i++) {
2305 
2306     /* put in lower diagonal portion */
2307     m = bd->i[i+1] - bd->i[i];
2308     while (m > 0) {
2309       /* is it above diagonal (in bd (compressed) numbering) */
2310       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2311       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2312       m--;
2313     }
2314 
2315     /* put in diagonal portion */
2316     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2317       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2318     }
2319 
2320     /* put in upper diagonal portion */
2321     while (m-- > 0) {
2322       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2323     }
2324   }
2325   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2326 
2327   /*--------------------------------------------------------------------
2328     Gather all column indices to all processors
2329   */
2330   for (i=0; i<size; i++) {
2331     recvcounts[i] = 0;
2332     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2333       recvcounts[i] += lens[j];
2334     }
2335   }
2336   displs[0] = 0;
2337   for (i=1; i<size; i++) {
2338     displs[i] = displs[i-1] + recvcounts[i-1];
2339   }
2340 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2341   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2342 #else
2343   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2344 #endif
2345   /*--------------------------------------------------------------------
2346     Assemble the matrix into useable form (note numerical values not yet set)
2347   */
2348   /* set the b->ilen (length of each row) values */
2349   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2350   /* set the b->i indices */
2351   b->i[0] = 0;
2352   for (i=1; i<=A->rmap->N/bs; i++) {
2353     b->i[i] = b->i[i-1] + lens[i-1];
2354   }
2355   ierr = PetscFree(lens);CHKERRQ(ierr);
2356   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2357   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2358   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2359 
2360   if (A->symmetric) {
2361     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2362   } else if (A->hermitian) {
2363     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2364   } else if (A->structurally_symmetric) {
2365     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2366   }
2367   *newmat = B;
2368   PetscFunctionReturn(0);
2369 }
2370 
2371 #undef __FUNCT__
2372 #define __FUNCT__ "MatSOR_MPIBAIJ"
2373 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2374 {
2375   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2376   PetscErrorCode ierr;
2377   Vec            bb1 = 0;
2378 
2379   PetscFunctionBegin;
2380   if (flag == SOR_APPLY_UPPER) {
2381     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2382     PetscFunctionReturn(0);
2383   }
2384 
2385   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2386     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2387   }
2388 
2389   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2390     if (flag & SOR_ZERO_INITIAL_GUESS) {
2391       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2392       its--;
2393     }
2394 
2395     while (its--) {
2396       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2397       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2398 
2399       /* update rhs: bb1 = bb - B*x */
2400       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2401       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2402 
2403       /* local sweep */
2404       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2405     }
2406   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2407     if (flag & SOR_ZERO_INITIAL_GUESS) {
2408       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2409       its--;
2410     }
2411     while (its--) {
2412       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2413       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2414 
2415       /* update rhs: bb1 = bb - B*x */
2416       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2417       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2418 
2419       /* local sweep */
2420       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2421     }
2422   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2423     if (flag & SOR_ZERO_INITIAL_GUESS) {
2424       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2425       its--;
2426     }
2427     while (its--) {
2428       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2429       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2430 
2431       /* update rhs: bb1 = bb - B*x */
2432       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2433       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2434 
2435       /* local sweep */
2436       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2437     }
2438   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2439 
2440   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2441   PetscFunctionReturn(0);
2442 }
2443 
2444 #undef __FUNCT__
2445 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ"
2446 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2447 {
2448   PetscErrorCode ierr;
2449   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2450   PetscInt       N,i,*garray = aij->garray;
2451   PetscInt       ib,jb,bs = A->rmap->bs;
2452   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2453   MatScalar      *a_val = a_aij->a;
2454   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2455   MatScalar      *b_val = b_aij->a;
2456   PetscReal      *work;
2457 
2458   PetscFunctionBegin;
2459   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2460   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2461   if (type == NORM_2) {
2462     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2463       for (jb=0; jb<bs; jb++) {
2464         for (ib=0; ib<bs; ib++) {
2465           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2466           a_val++;
2467         }
2468       }
2469     }
2470     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2471       for (jb=0; jb<bs; jb++) {
2472         for (ib=0; ib<bs; ib++) {
2473           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2474           b_val++;
2475         }
2476       }
2477     }
2478   } else if (type == NORM_1) {
2479     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2480       for (jb=0; jb<bs; jb++) {
2481         for (ib=0; ib<bs; ib++) {
2482           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2483           a_val++;
2484         }
2485       }
2486     }
2487     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2488       for (jb=0; jb<bs; jb++) {
2489        for (ib=0; ib<bs; ib++) {
2490           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2491           b_val++;
2492         }
2493       }
2494     }
2495   } else if (type == NORM_INFINITY) {
2496     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2497       for (jb=0; jb<bs; jb++) {
2498         for (ib=0; ib<bs; ib++) {
2499           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2500           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2501           a_val++;
2502         }
2503       }
2504     }
2505     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2506       for (jb=0; jb<bs; jb++) {
2507         for (ib=0; ib<bs; ib++) {
2508           int col = garray[b_aij->j[i]] * bs + jb;
2509           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2510           b_val++;
2511         }
2512       }
2513     }
2514   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2515   if (type == NORM_INFINITY) {
2516     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2517   } else {
2518     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2519   }
2520   ierr = PetscFree(work);CHKERRQ(ierr);
2521   if (type == NORM_2) {
2522     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2523   }
2524   PetscFunctionReturn(0);
2525 }
2526 
2527 #undef __FUNCT__
2528 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2529 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2530 {
2531   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2532   PetscErrorCode ierr;
2533 
2534   PetscFunctionBegin;
2535   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2536   PetscFunctionReturn(0);
2537 }
2538 
2539 
2540 /* -------------------------------------------------------------------*/
2541 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2542                                        MatGetRow_MPIBAIJ,
2543                                        MatRestoreRow_MPIBAIJ,
2544                                        MatMult_MPIBAIJ,
2545                                 /* 4*/ MatMultAdd_MPIBAIJ,
2546                                        MatMultTranspose_MPIBAIJ,
2547                                        MatMultTransposeAdd_MPIBAIJ,
2548                                        0,
2549                                        0,
2550                                        0,
2551                                 /*10*/ 0,
2552                                        0,
2553                                        0,
2554                                        MatSOR_MPIBAIJ,
2555                                        MatTranspose_MPIBAIJ,
2556                                 /*15*/ MatGetInfo_MPIBAIJ,
2557                                        MatEqual_MPIBAIJ,
2558                                        MatGetDiagonal_MPIBAIJ,
2559                                        MatDiagonalScale_MPIBAIJ,
2560                                        MatNorm_MPIBAIJ,
2561                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2562                                        MatAssemblyEnd_MPIBAIJ,
2563                                        MatSetOption_MPIBAIJ,
2564                                        MatZeroEntries_MPIBAIJ,
2565                                 /*24*/ MatZeroRows_MPIBAIJ,
2566                                        0,
2567                                        0,
2568                                        0,
2569                                        0,
2570                                 /*29*/ MatSetUp_MPIBAIJ,
2571                                        0,
2572                                        0,
2573                                        0,
2574                                        0,
2575                                 /*34*/ MatDuplicate_MPIBAIJ,
2576                                        0,
2577                                        0,
2578                                        0,
2579                                        0,
2580                                 /*39*/ MatAXPY_MPIBAIJ,
2581                                        MatGetSubMatrices_MPIBAIJ,
2582                                        MatIncreaseOverlap_MPIBAIJ,
2583                                        MatGetValues_MPIBAIJ,
2584                                        MatCopy_MPIBAIJ,
2585                                 /*44*/ 0,
2586                                        MatScale_MPIBAIJ,
2587                                        0,
2588                                        0,
2589                                        MatZeroRowsColumns_MPIBAIJ,
2590                                 /*49*/ 0,
2591                                        0,
2592                                        0,
2593                                        0,
2594                                        0,
2595                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2596                                        0,
2597                                        MatSetUnfactored_MPIBAIJ,
2598                                        MatPermute_MPIBAIJ,
2599                                        MatSetValuesBlocked_MPIBAIJ,
2600                                 /*59*/ MatGetSubMatrix_MPIBAIJ,
2601                                        MatDestroy_MPIBAIJ,
2602                                        MatView_MPIBAIJ,
2603                                        0,
2604                                        0,
2605                                 /*64*/ 0,
2606                                        0,
2607                                        0,
2608                                        0,
2609                                        0,
2610                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2611                                        0,
2612                                        0,
2613                                        0,
2614                                        0,
2615                                 /*74*/ 0,
2616                                        MatFDColoringApply_BAIJ,
2617                                        0,
2618                                        0,
2619                                        0,
2620                                 /*79*/ 0,
2621                                        0,
2622                                        0,
2623                                        0,
2624                                        MatLoad_MPIBAIJ,
2625                                 /*84*/ 0,
2626                                        0,
2627                                        0,
2628                                        0,
2629                                        0,
2630                                 /*89*/ 0,
2631                                        0,
2632                                        0,
2633                                        0,
2634                                        0,
2635                                 /*94*/ 0,
2636                                        0,
2637                                        0,
2638                                        0,
2639                                        0,
2640                                 /*99*/ 0,
2641                                        0,
2642                                        0,
2643                                        0,
2644                                        0,
2645                                 /*104*/0,
2646                                        MatRealPart_MPIBAIJ,
2647                                        MatImaginaryPart_MPIBAIJ,
2648                                        0,
2649                                        0,
2650                                 /*109*/0,
2651                                        0,
2652                                        0,
2653                                        0,
2654                                        0,
2655                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2656                                        0,
2657                                        MatGetGhosts_MPIBAIJ,
2658                                        0,
2659                                        0,
2660                                 /*119*/0,
2661                                        0,
2662                                        0,
2663                                        0,
2664                                        MatGetMultiProcBlock_MPIBAIJ,
2665                                 /*124*/0,
2666                                        MatGetColumnNorms_MPIBAIJ,
2667                                        MatInvertBlockDiagonal_MPIBAIJ,
2668                                        0,
2669                                        0,
2670                                /*129*/ 0,
2671                                        0,
2672                                        0,
2673                                        0,
2674                                        0,
2675                                /*134*/ 0,
2676                                        0,
2677                                        0,
2678                                        0,
2679                                        0,
2680                                /*139*/ 0,
2681                                        0,
2682                                        0,
2683                                        MatFDColoringSetUp_MPIXAIJ
2684 };
2685 
2686 #undef __FUNCT__
2687 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2688 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2689 {
2690   PetscFunctionBegin;
2691   *a = ((Mat_MPIBAIJ*)A->data)->A;
2692   PetscFunctionReturn(0);
2693 }
2694 
2695 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2696 
2697 #undef __FUNCT__
2698 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2699 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2700 {
2701   PetscInt       m,rstart,cstart,cend;
2702   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2703   const PetscInt *JJ    =0;
2704   PetscScalar    *values=0;
2705   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2706   PetscErrorCode ierr;
2707 
2708   PetscFunctionBegin;
2709   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2710   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2711   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2712   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2713   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2714   m      = B->rmap->n/bs;
2715   rstart = B->rmap->rstart/bs;
2716   cstart = B->cmap->rstart/bs;
2717   cend   = B->cmap->rend/bs;
2718 
2719   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2720   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2721   for (i=0; i<m; i++) {
2722     nz = ii[i+1] - ii[i];
2723     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2724     nz_max = PetscMax(nz_max,nz);
2725     JJ     = jj + ii[i];
2726     for (j=0; j<nz; j++) {
2727       if (*JJ >= cstart) break;
2728       JJ++;
2729     }
2730     d = 0;
2731     for (; j<nz; j++) {
2732       if (*JJ++ >= cend) break;
2733       d++;
2734     }
2735     d_nnz[i] = d;
2736     o_nnz[i] = nz - d;
2737   }
2738   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2739   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2740 
2741   values = (PetscScalar*)V;
2742   if (!values) {
2743     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2744     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2745   }
2746   for (i=0; i<m; i++) {
2747     PetscInt          row    = i + rstart;
2748     PetscInt          ncols  = ii[i+1] - ii[i];
2749     const PetscInt    *icols = jj + ii[i];
2750     if (!roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2751       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2752       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2753     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2754       PetscInt j;
2755       for (j=0; j<ncols; j++) {
2756         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2757         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2758       }
2759     }
2760   }
2761 
2762   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2763   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2764   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2765   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2766   PetscFunctionReturn(0);
2767 }
2768 
2769 #undef __FUNCT__
2770 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2771 /*@C
2772    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2773    (the default parallel PETSc format).
2774 
2775    Collective on MPI_Comm
2776 
2777    Input Parameters:
2778 +  A - the matrix
2779 .  bs - the block size
2780 .  i - the indices into j for the start of each local row (starts with zero)
2781 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2782 -  v - optional values in the matrix
2783 
2784    Level: developer
2785 
2786    Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2787    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2788    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2789    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2790    block column and the second index is over columns within a block.
2791 
2792 .keywords: matrix, aij, compressed row, sparse, parallel
2793 
2794 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2795 @*/
2796 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2797 {
2798   PetscErrorCode ierr;
2799 
2800   PetscFunctionBegin;
2801   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2802   PetscValidType(B,1);
2803   PetscValidLogicalCollectiveInt(B,bs,2);
2804   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2805   PetscFunctionReturn(0);
2806 }
2807 
2808 #undef __FUNCT__
2809 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2810 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2811 {
2812   Mat_MPIBAIJ    *b;
2813   PetscErrorCode ierr;
2814   PetscInt       i;
2815 
2816   PetscFunctionBegin;
2817   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2818   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2819   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2820   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2821   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2822 
2823   if (d_nnz) {
2824     for (i=0; i<B->rmap->n/bs; i++) {
2825       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2826     }
2827   }
2828   if (o_nnz) {
2829     for (i=0; i<B->rmap->n/bs; i++) {
2830       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2831     }
2832   }
2833 
2834   b      = (Mat_MPIBAIJ*)B->data;
2835   b->bs2 = bs*bs;
2836   b->mbs = B->rmap->n/bs;
2837   b->nbs = B->cmap->n/bs;
2838   b->Mbs = B->rmap->N/bs;
2839   b->Nbs = B->cmap->N/bs;
2840 
2841   for (i=0; i<=b->size; i++) {
2842     b->rangebs[i] = B->rmap->range[i]/bs;
2843   }
2844   b->rstartbs = B->rmap->rstart/bs;
2845   b->rendbs   = B->rmap->rend/bs;
2846   b->cstartbs = B->cmap->rstart/bs;
2847   b->cendbs   = B->cmap->rend/bs;
2848 
2849   if (!B->preallocated) {
2850     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2851     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2852     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2853     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2854     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2855     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2856     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2857     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2858     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2859   }
2860 
2861   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2862   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2863   B->preallocated = PETSC_TRUE;
2864   PetscFunctionReturn(0);
2865 }
2866 
2867 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2868 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2869 
2870 #undef __FUNCT__
2871 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2872 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2873 {
2874   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2875   PetscErrorCode ierr;
2876   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2877   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2878   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2879 
2880   PetscFunctionBegin;
2881   ierr  = PetscMalloc1((M+1),&ii);CHKERRQ(ierr);
2882   ii[0] = 0;
2883   for (i=0; i<M; i++) {
2884     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2885     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2886     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2887     /* remove one from count of matrix has diagonal */
2888     for (j=id[i]; j<id[i+1]; j++) {
2889       if (jd[j] == i) {ii[i+1]--;break;}
2890     }
2891   }
2892   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2893   cnt  = 0;
2894   for (i=0; i<M; i++) {
2895     for (j=io[i]; j<io[i+1]; j++) {
2896       if (garray[jo[j]] > rstart) break;
2897       jj[cnt++] = garray[jo[j]];
2898     }
2899     for (k=id[i]; k<id[i+1]; k++) {
2900       if (jd[k] != i) {
2901         jj[cnt++] = rstart + jd[k];
2902       }
2903     }
2904     for (; j<io[i+1]; j++) {
2905       jj[cnt++] = garray[jo[j]];
2906     }
2907   }
2908   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2909   PetscFunctionReturn(0);
2910 }
2911 
2912 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2913 
2914 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2915 
2916 #undef __FUNCT__
2917 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
2918 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2919 {
2920   PetscErrorCode ierr;
2921   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2922   Mat            B;
2923   Mat_MPIAIJ     *b;
2924 
2925   PetscFunctionBegin;
2926   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2927 
2928   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2929   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2930   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2931   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2932   ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2933   b    = (Mat_MPIAIJ*) B->data;
2934 
2935   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2936   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2937   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2938   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2939   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2940   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2941   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2942   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2943   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2944   if (reuse == MAT_REUSE_MATRIX) {
2945     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2946   } else {
2947    *newmat = B;
2948   }
2949   PetscFunctionReturn(0);
2950 }
2951 
2952 #if defined(PETSC_HAVE_MUMPS)
2953 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
2954 #endif
2955 
2956 /*MC
2957    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2958 
2959    Options Database Keys:
2960 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2961 . -mat_block_size <bs> - set the blocksize used to store the matrix
2962 - -mat_use_hash_table <fact>
2963 
2964   Level: beginner
2965 
2966 .seealso: MatCreateMPIBAIJ
2967 M*/
2968 
2969 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2970 
2971 #undef __FUNCT__
2972 #define __FUNCT__ "MatCreate_MPIBAIJ"
2973 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2974 {
2975   Mat_MPIBAIJ    *b;
2976   PetscErrorCode ierr;
2977   PetscBool      flg;
2978 
2979   PetscFunctionBegin;
2980   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2981   B->data = (void*)b;
2982 
2983   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2984   B->assembled = PETSC_FALSE;
2985 
2986   B->insertmode = NOT_SET_VALUES;
2987   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2988   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2989 
2990   /* build local table of row and column ownerships */
2991   ierr = PetscMalloc1((b->size+1),&b->rangebs);CHKERRQ(ierr);
2992 
2993   /* build cache for off array entries formed */
2994   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2995 
2996   b->donotstash  = PETSC_FALSE;
2997   b->colmap      = NULL;
2998   b->garray      = NULL;
2999   b->roworiented = PETSC_TRUE;
3000 
3001   /* stuff used in block assembly */
3002   b->barray = 0;
3003 
3004   /* stuff used for matrix vector multiply */
3005   b->lvec  = 0;
3006   b->Mvctx = 0;
3007 
3008   /* stuff for MatGetRow() */
3009   b->rowindices   = 0;
3010   b->rowvalues    = 0;
3011   b->getrowactive = PETSC_FALSE;
3012 
3013   /* hash table stuff */
3014   b->ht           = 0;
3015   b->hd           = 0;
3016   b->ht_size      = 0;
3017   b->ht_flag      = PETSC_FALSE;
3018   b->ht_fact      = 0;
3019   b->ht_total_ct  = 0;
3020   b->ht_insert_ct = 0;
3021 
3022   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3023   b->ijonly = PETSC_FALSE;
3024 
3025   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3026   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
3027   if (flg) {
3028     PetscReal fact = 1.39;
3029     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3030     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3031     if (fact <= 1.0) fact = 1.39;
3032     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3033     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3034   }
3035   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3036 
3037 #if defined(PETSC_HAVE_MUMPS)
3038   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3039 #endif
3040   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3041   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3042   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3043   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3044   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3045   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3046   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3047   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3049   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3050   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3051   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3052   PetscFunctionReturn(0);
3053 }
3054 
3055 /*MC
3056    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3057 
3058    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3059    and MATMPIBAIJ otherwise.
3060 
3061    Options Database Keys:
3062 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3063 
3064   Level: beginner
3065 
3066 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3067 M*/
3068 
3069 #undef __FUNCT__
3070 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3071 /*@C
3072    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3073    (block compressed row).  For good matrix assembly performance
3074    the user should preallocate the matrix storage by setting the parameters
3075    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3076    performance can be increased by more than a factor of 50.
3077 
3078    Collective on Mat
3079 
3080    Input Parameters:
3081 +  A - the matrix
3082 .  bs   - size of block
3083 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3084            submatrix  (same for all local rows)
3085 .  d_nnz - array containing the number of block nonzeros in the various block rows
3086            of the in diagonal portion of the local (possibly different for each block
3087            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3088            set it even if it is zero.
3089 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3090            submatrix (same for all local rows).
3091 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3092            off-diagonal portion of the local submatrix (possibly different for
3093            each block row) or NULL.
3094 
3095    If the *_nnz parameter is given then the *_nz parameter is ignored
3096 
3097    Options Database Keys:
3098 +   -mat_block_size - size of the blocks to use
3099 -   -mat_use_hash_table <fact>
3100 
3101    Notes:
3102    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3103    than it must be used on all processors that share the object for that argument.
3104 
3105    Storage Information:
3106    For a square global matrix we define each processor's diagonal portion
3107    to be its local rows and the corresponding columns (a square submatrix);
3108    each processor's off-diagonal portion encompasses the remainder of the
3109    local matrix (a rectangular submatrix).
3110 
3111    The user can specify preallocated storage for the diagonal part of
3112    the local submatrix with either d_nz or d_nnz (not both).  Set
3113    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3114    memory allocation.  Likewise, specify preallocated storage for the
3115    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3116 
3117    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3118    the figure below we depict these three local rows and all columns (0-11).
3119 
3120 .vb
3121            0 1 2 3 4 5 6 7 8 9 10 11
3122           --------------------------
3123    row 3  |o o o d d d o o o o  o  o
3124    row 4  |o o o d d d o o o o  o  o
3125    row 5  |o o o d d d o o o o  o  o
3126           --------------------------
3127 .ve
3128 
3129    Thus, any entries in the d locations are stored in the d (diagonal)
3130    submatrix, and any entries in the o locations are stored in the
3131    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3132    stored simply in the MATSEQBAIJ format for compressed row storage.
3133 
3134    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3135    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3136    In general, for PDE problems in which most nonzeros are near the diagonal,
3137    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3138    or you will get TERRIBLE performance; see the users' manual chapter on
3139    matrices.
3140 
3141    You can call MatGetInfo() to get information on how effective the preallocation was;
3142    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3143    You can also run with the option -info and look for messages with the string
3144    malloc in them to see if additional memory allocation was needed.
3145 
3146    Level: intermediate
3147 
3148 .keywords: matrix, block, aij, compressed row, sparse, parallel
3149 
3150 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3151 @*/
3152 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3153 {
3154   PetscErrorCode ierr;
3155 
3156   PetscFunctionBegin;
3157   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3158   PetscValidType(B,1);
3159   PetscValidLogicalCollectiveInt(B,bs,2);
3160   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3161   PetscFunctionReturn(0);
3162 }
3163 
3164 #undef __FUNCT__
3165 #define __FUNCT__ "MatCreateBAIJ"
3166 /*@C
3167    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3168    (block compressed row).  For good matrix assembly performance
3169    the user should preallocate the matrix storage by setting the parameters
3170    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3171    performance can be increased by more than a factor of 50.
3172 
3173    Collective on MPI_Comm
3174 
3175    Input Parameters:
3176 +  comm - MPI communicator
3177 .  bs   - size of blockk
3178 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3179            This value should be the same as the local size used in creating the
3180            y vector for the matrix-vector product y = Ax.
3181 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3182            This value should be the same as the local size used in creating the
3183            x vector for the matrix-vector product y = Ax.
3184 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3185 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3186 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3187            submatrix  (same for all local rows)
3188 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3189            of the in diagonal portion of the local (possibly different for each block
3190            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3191            and set it even if it is zero.
3192 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3193            submatrix (same for all local rows).
3194 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3195            off-diagonal portion of the local submatrix (possibly different for
3196            each block row) or NULL.
3197 
3198    Output Parameter:
3199 .  A - the matrix
3200 
3201    Options Database Keys:
3202 +   -mat_block_size - size of the blocks to use
3203 -   -mat_use_hash_table <fact>
3204 
3205    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3206    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3207    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3208 
3209    Notes:
3210    If the *_nnz parameter is given then the *_nz parameter is ignored
3211 
3212    A nonzero block is any block that as 1 or more nonzeros in it
3213 
3214    The user MUST specify either the local or global matrix dimensions
3215    (possibly both).
3216 
3217    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3218    than it must be used on all processors that share the object for that argument.
3219 
3220    Storage Information:
3221    For a square global matrix we define each processor's diagonal portion
3222    to be its local rows and the corresponding columns (a square submatrix);
3223    each processor's off-diagonal portion encompasses the remainder of the
3224    local matrix (a rectangular submatrix).
3225 
3226    The user can specify preallocated storage for the diagonal part of
3227    the local submatrix with either d_nz or d_nnz (not both).  Set
3228    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3229    memory allocation.  Likewise, specify preallocated storage for the
3230    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3231 
3232    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3233    the figure below we depict these three local rows and all columns (0-11).
3234 
3235 .vb
3236            0 1 2 3 4 5 6 7 8 9 10 11
3237           --------------------------
3238    row 3  |o o o d d d o o o o  o  o
3239    row 4  |o o o d d d o o o o  o  o
3240    row 5  |o o o d d d o o o o  o  o
3241           --------------------------
3242 .ve
3243 
3244    Thus, any entries in the d locations are stored in the d (diagonal)
3245    submatrix, and any entries in the o locations are stored in the
3246    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3247    stored simply in the MATSEQBAIJ format for compressed row storage.
3248 
3249    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3250    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3251    In general, for PDE problems in which most nonzeros are near the diagonal,
3252    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3253    or you will get TERRIBLE performance; see the users' manual chapter on
3254    matrices.
3255 
3256    Level: intermediate
3257 
3258 .keywords: matrix, block, aij, compressed row, sparse, parallel
3259 
3260 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3261 @*/
3262 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3263 {
3264   PetscErrorCode ierr;
3265   PetscMPIInt    size;
3266 
3267   PetscFunctionBegin;
3268   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3269   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3270   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3271   if (size > 1) {
3272     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3273     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3274   } else {
3275     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3276     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3277   }
3278   PetscFunctionReturn(0);
3279 }
3280 
3281 #undef __FUNCT__
3282 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3283 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3284 {
3285   Mat            mat;
3286   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3287   PetscErrorCode ierr;
3288   PetscInt       len=0;
3289 
3290   PetscFunctionBegin;
3291   *newmat = 0;
3292   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3293   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3294   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3295   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3296 
3297   mat->factortype   = matin->factortype;
3298   mat->preallocated = PETSC_TRUE;
3299   mat->assembled    = PETSC_TRUE;
3300   mat->insertmode   = NOT_SET_VALUES;
3301 
3302   a             = (Mat_MPIBAIJ*)mat->data;
3303   mat->rmap->bs = matin->rmap->bs;
3304   a->bs2        = oldmat->bs2;
3305   a->mbs        = oldmat->mbs;
3306   a->nbs        = oldmat->nbs;
3307   a->Mbs        = oldmat->Mbs;
3308   a->Nbs        = oldmat->Nbs;
3309 
3310   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3311   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3312 
3313   a->size         = oldmat->size;
3314   a->rank         = oldmat->rank;
3315   a->donotstash   = oldmat->donotstash;
3316   a->roworiented  = oldmat->roworiented;
3317   a->rowindices   = 0;
3318   a->rowvalues    = 0;
3319   a->getrowactive = PETSC_FALSE;
3320   a->barray       = 0;
3321   a->rstartbs     = oldmat->rstartbs;
3322   a->rendbs       = oldmat->rendbs;
3323   a->cstartbs     = oldmat->cstartbs;
3324   a->cendbs       = oldmat->cendbs;
3325 
3326   /* hash table stuff */
3327   a->ht           = 0;
3328   a->hd           = 0;
3329   a->ht_size      = 0;
3330   a->ht_flag      = oldmat->ht_flag;
3331   a->ht_fact      = oldmat->ht_fact;
3332   a->ht_total_ct  = 0;
3333   a->ht_insert_ct = 0;
3334 
3335   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3336   if (oldmat->colmap) {
3337 #if defined(PETSC_USE_CTABLE)
3338     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3339 #else
3340     ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr);
3341     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3342     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3343 #endif
3344   } else a->colmap = 0;
3345 
3346   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3347     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3348     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3349     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3350   } else a->garray = 0;
3351 
3352   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3353   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3354   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3355   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3356   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3357 
3358   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3359   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3360   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3361   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3362   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3363   *newmat = mat;
3364   PetscFunctionReturn(0);
3365 }
3366 
3367 #undef __FUNCT__
3368 #define __FUNCT__ "MatLoad_MPIBAIJ"
3369 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3370 {
3371   PetscErrorCode ierr;
3372   int            fd;
3373   PetscInt       i,nz,j,rstart,rend;
3374   PetscScalar    *vals,*buf;
3375   MPI_Comm       comm;
3376   MPI_Status     status;
3377   PetscMPIInt    rank,size,maxnz;
3378   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3379   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3380   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3381   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3382   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3383   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3384 
3385   PetscFunctionBegin;
3386   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3387   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3388   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3389   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3390 
3391   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3392   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3393   if (!rank) {
3394     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3395     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3396     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3397   }
3398 
3399   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3400 
3401   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3402   M    = header[1]; N = header[2];
3403 
3404   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3405   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3406   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3407 
3408   /* If global sizes are set, check if they are consistent with that given in the file */
3409   if (sizesset) {
3410     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3411   }
3412   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3413   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3414 
3415   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3416 
3417   /*
3418      This code adds extra rows to make sure the number of rows is
3419      divisible by the blocksize
3420   */
3421   Mbs        = M/bs;
3422   extra_rows = bs - M + bs*Mbs;
3423   if (extra_rows == bs) extra_rows = 0;
3424   else                  Mbs++;
3425   if (extra_rows && !rank) {
3426     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3427   }
3428 
3429   /* determine ownership of all rows */
3430   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3431     mbs = Mbs/size + ((Mbs % size) > rank);
3432     m   = mbs*bs;
3433   } else { /* User set */
3434     m   = newmat->rmap->n;
3435     mbs = m/bs;
3436   }
3437   ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
3438   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3439 
3440   /* process 0 needs enough room for process with most rows */
3441   if (!rank) {
3442     mmax = rowners[1];
3443     for (i=2; i<=size; i++) {
3444       mmax = PetscMax(mmax,rowners[i]);
3445     }
3446     mmax*=bs;
3447   } else mmax = -1;             /* unused, but compiler warns anyway */
3448 
3449   rowners[0] = 0;
3450   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3451   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3452   rstart = rowners[rank];
3453   rend   = rowners[rank+1];
3454 
3455   /* distribute row lengths to all processors */
3456   ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr);
3457   if (!rank) {
3458     mend = m;
3459     if (size == 1) mend = mend - extra_rows;
3460     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3461     for (j=mend; j<m; j++) locrowlens[j] = 1;
3462     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3463     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3464     for (j=0; j<m; j++) {
3465       procsnz[0] += locrowlens[j];
3466     }
3467     for (i=1; i<size; i++) {
3468       mend = browners[i+1] - browners[i];
3469       if (i == size-1) mend = mend - extra_rows;
3470       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3471       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3472       /* calculate the number of nonzeros on each processor */
3473       for (j=0; j<browners[i+1]-browners[i]; j++) {
3474         procsnz[i] += rowlengths[j];
3475       }
3476       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3477     }
3478     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3479   } else {
3480     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3481   }
3482 
3483   if (!rank) {
3484     /* determine max buffer needed and allocate it */
3485     maxnz = procsnz[0];
3486     for (i=1; i<size; i++) {
3487       maxnz = PetscMax(maxnz,procsnz[i]);
3488     }
3489     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3490 
3491     /* read in my part of the matrix column indices  */
3492     nz     = procsnz[0];
3493     ierr   = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr);
3494     mycols = ibuf;
3495     if (size == 1) nz -= extra_rows;
3496     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3497     if (size == 1) {
3498       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3499     }
3500 
3501     /* read in every ones (except the last) and ship off */
3502     for (i=1; i<size-1; i++) {
3503       nz   = procsnz[i];
3504       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3505       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3506     }
3507     /* read in the stuff for the last proc */
3508     if (size != 1) {
3509       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3510       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3511       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3512       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3513     }
3514     ierr = PetscFree(cols);CHKERRQ(ierr);
3515   } else {
3516     /* determine buffer space needed for message */
3517     nz = 0;
3518     for (i=0; i<m; i++) {
3519       nz += locrowlens[i];
3520     }
3521     ierr   = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr);
3522     mycols = ibuf;
3523     /* receive message of column indices*/
3524     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3525     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3526     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3527   }
3528 
3529   /* loop over local rows, determining number of off diagonal entries */
3530   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
3531   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
3532   rowcount = 0; nzcount = 0;
3533   for (i=0; i<mbs; i++) {
3534     dcount  = 0;
3535     odcount = 0;
3536     for (j=0; j<bs; j++) {
3537       kmax = locrowlens[rowcount];
3538       for (k=0; k<kmax; k++) {
3539         tmp = mycols[nzcount++]/bs;
3540         if (!mask[tmp]) {
3541           mask[tmp] = 1;
3542           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3543           else masked1[dcount++] = tmp;
3544         }
3545       }
3546       rowcount++;
3547     }
3548 
3549     dlens[i]  = dcount;
3550     odlens[i] = odcount;
3551 
3552     /* zero out the mask elements we set */
3553     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3554     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3555   }
3556 
3557 
3558   if (!sizesset) {
3559     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3560   }
3561   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3562 
3563   if (!rank) {
3564     ierr = PetscMalloc1((maxnz+1),&buf);CHKERRQ(ierr);
3565     /* read in my part of the matrix numerical values  */
3566     nz     = procsnz[0];
3567     vals   = buf;
3568     mycols = ibuf;
3569     if (size == 1) nz -= extra_rows;
3570     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3571     if (size == 1) {
3572       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3573     }
3574 
3575     /* insert into matrix */
3576     jj = rstart*bs;
3577     for (i=0; i<m; i++) {
3578       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3579       mycols += locrowlens[i];
3580       vals   += locrowlens[i];
3581       jj++;
3582     }
3583     /* read in other processors (except the last one) and ship out */
3584     for (i=1; i<size-1; i++) {
3585       nz   = procsnz[i];
3586       vals = buf;
3587       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3588       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3589     }
3590     /* the last proc */
3591     if (size != 1) {
3592       nz   = procsnz[i] - extra_rows;
3593       vals = buf;
3594       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3595       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3596       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3597     }
3598     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3599   } else {
3600     /* receive numeric values */
3601     ierr = PetscMalloc1((nz+1),&buf);CHKERRQ(ierr);
3602 
3603     /* receive message of values*/
3604     vals   = buf;
3605     mycols = ibuf;
3606     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3607 
3608     /* insert into matrix */
3609     jj = rstart*bs;
3610     for (i=0; i<m; i++) {
3611       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3612       mycols += locrowlens[i];
3613       vals   += locrowlens[i];
3614       jj++;
3615     }
3616   }
3617   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3618   ierr = PetscFree(buf);CHKERRQ(ierr);
3619   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3620   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3621   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3622   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3623   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3624   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3625   PetscFunctionReturn(0);
3626 }
3627 
3628 #undef __FUNCT__
3629 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3630 /*@
3631    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3632 
3633    Input Parameters:
3634 .  mat  - the matrix
3635 .  fact - factor
3636 
3637    Not Collective, each process can use a different factor
3638 
3639    Level: advanced
3640 
3641   Notes:
3642    This can also be set by the command line option: -mat_use_hash_table <fact>
3643 
3644 .keywords: matrix, hashtable, factor, HT
3645 
3646 .seealso: MatSetOption()
3647 @*/
3648 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3649 {
3650   PetscErrorCode ierr;
3651 
3652   PetscFunctionBegin;
3653   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3654   PetscFunctionReturn(0);
3655 }
3656 
3657 #undef __FUNCT__
3658 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3659 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3660 {
3661   Mat_MPIBAIJ *baij;
3662 
3663   PetscFunctionBegin;
3664   baij          = (Mat_MPIBAIJ*)mat->data;
3665   baij->ht_fact = fact;
3666   PetscFunctionReturn(0);
3667 }
3668 
3669 #undef __FUNCT__
3670 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3671 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3672 {
3673   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3674 
3675   PetscFunctionBegin;
3676   *Ad     = a->A;
3677   *Ao     = a->B;
3678   *colmap = a->garray;
3679   PetscFunctionReturn(0);
3680 }
3681 
3682 /*
3683     Special version for direct calls from Fortran (to eliminate two function call overheads
3684 */
3685 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3686 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3687 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3688 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3689 #endif
3690 
3691 #undef __FUNCT__
3692 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3693 /*@C
3694   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3695 
3696   Collective on Mat
3697 
3698   Input Parameters:
3699 + mat - the matrix
3700 . min - number of input rows
3701 . im - input rows
3702 . nin - number of input columns
3703 . in - input columns
3704 . v - numerical values input
3705 - addvin - INSERT_VALUES or ADD_VALUES
3706 
3707   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3708 
3709   Level: advanced
3710 
3711 .seealso:   MatSetValuesBlocked()
3712 @*/
3713 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3714 {
3715   /* convert input arguments to C version */
3716   Mat        mat  = *matin;
3717   PetscInt   m    = *min, n = *nin;
3718   InsertMode addv = *addvin;
3719 
3720   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3721   const MatScalar *value;
3722   MatScalar       *barray     = baij->barray;
3723   PetscBool       roworiented = baij->roworiented;
3724   PetscErrorCode  ierr;
3725   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3726   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3727   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3728 
3729   PetscFunctionBegin;
3730   /* tasks normally handled by MatSetValuesBlocked() */
3731   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3732 #if defined(PETSC_USE_DEBUG)
3733   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3734   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3735 #endif
3736   if (mat->assembled) {
3737     mat->was_assembled = PETSC_TRUE;
3738     mat->assembled     = PETSC_FALSE;
3739   }
3740   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3741 
3742 
3743   if (!barray) {
3744     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3745     baij->barray = barray;
3746   }
3747 
3748   if (roworiented) stepval = (n-1)*bs;
3749   else stepval = (m-1)*bs;
3750 
3751   for (i=0; i<m; i++) {
3752     if (im[i] < 0) continue;
3753 #if defined(PETSC_USE_DEBUG)
3754     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3755 #endif
3756     if (im[i] >= rstart && im[i] < rend) {
3757       row = im[i] - rstart;
3758       for (j=0; j<n; j++) {
3759         /* If NumCol = 1 then a copy is not required */
3760         if ((roworiented) && (n == 1)) {
3761           barray = (MatScalar*)v + i*bs2;
3762         } else if ((!roworiented) && (m == 1)) {
3763           barray = (MatScalar*)v + j*bs2;
3764         } else { /* Here a copy is required */
3765           if (roworiented) {
3766             value = v + i*(stepval+bs)*bs + j*bs;
3767           } else {
3768             value = v + j*(stepval+bs)*bs + i*bs;
3769           }
3770           for (ii=0; ii<bs; ii++,value+=stepval) {
3771             for (jj=0; jj<bs; jj++) {
3772               *barray++ = *value++;
3773             }
3774           }
3775           barray -=bs2;
3776         }
3777 
3778         if (in[j] >= cstart && in[j] < cend) {
3779           col  = in[j] - cstart;
3780           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3781         } else if (in[j] < 0) continue;
3782 #if defined(PETSC_USE_DEBUG)
3783         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3784 #endif
3785         else {
3786           if (mat->was_assembled) {
3787             if (!baij->colmap) {
3788               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3789             }
3790 
3791 #if defined(PETSC_USE_DEBUG)
3792 #if defined(PETSC_USE_CTABLE)
3793             { PetscInt data;
3794               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3795               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3796             }
3797 #else
3798             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3799 #endif
3800 #endif
3801 #if defined(PETSC_USE_CTABLE)
3802             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3803             col  = (col - 1)/bs;
3804 #else
3805             col = (baij->colmap[in[j]] - 1)/bs;
3806 #endif
3807             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3808               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3809               col  =  in[j];
3810             }
3811           } else col = in[j];
3812           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3813         }
3814       }
3815     } else {
3816       if (!baij->donotstash) {
3817         if (roworiented) {
3818           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3819         } else {
3820           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3821         }
3822       }
3823     }
3824   }
3825 
3826   /* task normally handled by MatSetValuesBlocked() */
3827   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3828   PetscFunctionReturn(0);
3829 }
3830 
3831 #undef __FUNCT__
3832 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
3833 /*@
3834      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
3835          CSR format the local rows.
3836 
3837    Collective on MPI_Comm
3838 
3839    Input Parameters:
3840 +  comm - MPI communicator
3841 .  bs - the block size, only a block size of 1 is supported
3842 .  m - number of local rows (Cannot be PETSC_DECIDE)
3843 .  n - This value should be the same as the local size used in creating the
3844        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3845        calculated if N is given) For square matrices n is almost always m.
3846 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3847 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3848 .   i - row indices
3849 .   j - column indices
3850 -   a - matrix values
3851 
3852    Output Parameter:
3853 .   mat - the matrix
3854 
3855    Level: intermediate
3856 
3857    Notes:
3858        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3859      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3860      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3861 
3862      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3863      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3864      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3865      with column-major ordering within blocks.
3866 
3867        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3868 
3869 .keywords: matrix, aij, compressed row, sparse, parallel
3870 
3871 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3872           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3873 @*/
3874 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3875 {
3876   PetscErrorCode ierr;
3877 
3878   PetscFunctionBegin;
3879   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3880   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3881   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3882   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3883   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3884   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3885   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3886   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3887   PetscFunctionReturn(0);
3888 }
3889