xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision cf1aed2ce99d23e50336629af3ca8cf096900abb)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 #include <petscsf.h>
5 
6 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
8 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
9 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
10 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
11 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
14 
15 #undef __FUNCT__
16 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
17 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
18 {
19   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
20   PetscErrorCode ierr;
21   PetscInt       i,*idxb = 0;
22   PetscScalar    *va,*vb;
23   Vec            vtmp;
24 
25   PetscFunctionBegin;
26   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
27   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
28   if (idx) {
29     for (i=0; i<A->rmap->n; i++) {
30       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
31     }
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap->n; i++) {
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
41       va[i] = vb[i];
42       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
43     }
44   }
45 
46   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
47   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
48   ierr = PetscFree(idxb);CHKERRQ(ierr);
49   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
55 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
56 {
57   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
58   PetscErrorCode ierr;
59 
60   PetscFunctionBegin;
61   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
62   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 
79 /*
80      Local utility routine that creates a mapping from the global column
81    number to the local number in the off-diagonal part of the local
82    storage of the matrix.  This is done in a non scalable way since the
83    length of colmap equals the global matrix length.
84 */
85 #undef __FUNCT__
86 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
87 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
88 {
89   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
90   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
91   PetscErrorCode ierr;
92   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
93 
94   PetscFunctionBegin;
95 #if defined(PETSC_USE_CTABLE)
96   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
97   for (i=0; i<nbs; i++) {
98     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
99   }
100 #else
101   ierr = PetscMalloc1((baij->Nbs+1),&baij->colmap);CHKERRQ(ierr);
102   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
103   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
105 #endif
106   PetscFunctionReturn(0);
107 }
108 
109 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
110   { \
111  \
112     brow = row/bs;  \
113     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
114     rmax = aimax[brow]; nrow = ailen[brow]; \
115     bcol = col/bs; \
116     ridx = row % bs; cidx = col % bs; \
117     low  = 0; high = nrow; \
118     while (high-low > 3) { \
119       t = (low+high)/2; \
120       if (rp[t] > bcol) high = t; \
121       else              low  = t; \
122     } \
123     for (_i=low; _i<high; _i++) { \
124       if (rp[_i] > bcol) break; \
125       if (rp[_i] == bcol) { \
126         bap = ap +  bs2*_i + bs*cidx + ridx; \
127         if (addv == ADD_VALUES) *bap += value;  \
128         else                    *bap  = value;  \
129         goto a_noinsert; \
130       } \
131     } \
132     if (a->nonew == 1) goto a_noinsert; \
133     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
134     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
135     N = nrow++ - 1;  \
136     /* shift up all the later entries in this row */ \
137     for (ii=N; ii>=_i; ii--) { \
138       rp[ii+1] = rp[ii]; \
139       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
140     } \
141     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
142     rp[_i]                      = bcol;  \
143     ap[bs2*_i + bs*cidx + ridx] = value;  \
144 a_noinsert:; \
145     ailen[brow] = nrow; \
146   }
147 
148 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
149   { \
150     brow = row/bs;  \
151     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
152     rmax = bimax[brow]; nrow = bilen[brow]; \
153     bcol = col/bs; \
154     ridx = row % bs; cidx = col % bs; \
155     low  = 0; high = nrow; \
156     while (high-low > 3) { \
157       t = (low+high)/2; \
158       if (rp[t] > bcol) high = t; \
159       else              low  = t; \
160     } \
161     for (_i=low; _i<high; _i++) { \
162       if (rp[_i] > bcol) break; \
163       if (rp[_i] == bcol) { \
164         bap = ap +  bs2*_i + bs*cidx + ridx; \
165         if (addv == ADD_VALUES) *bap += value;  \
166         else                    *bap  = value;  \
167         goto b_noinsert; \
168       } \
169     } \
170     if (b->nonew == 1) goto b_noinsert; \
171     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
172     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
173     N = nrow++ - 1;  \
174     /* shift up all the later entries in this row */ \
175     for (ii=N; ii>=_i; ii--) { \
176       rp[ii+1] = rp[ii]; \
177       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
178     } \
179     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
180     rp[_i]                      = bcol;  \
181     ap[bs2*_i + bs*cidx + ridx] = value;  \
182 b_noinsert:; \
183     bilen[brow] = nrow; \
184   }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatSetValues_MPIBAIJ"
188 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
189 {
190   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
191   MatScalar      value;
192   PetscBool      roworiented = baij->roworiented;
193   PetscErrorCode ierr;
194   PetscInt       i,j,row,col;
195   PetscInt       rstart_orig=mat->rmap->rstart;
196   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
197   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
198 
199   /* Some Variables required in the macro */
200   Mat         A     = baij->A;
201   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
202   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
203   MatScalar   *aa   =a->a;
204 
205   Mat         B     = baij->B;
206   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
207   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
208   MatScalar   *ba   =b->a;
209 
210   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
211   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
212   MatScalar *ap,*bap;
213 
214   PetscFunctionBegin;
215   for (i=0; i<m; i++) {
216     if (im[i] < 0) continue;
217 #if defined(PETSC_USE_DEBUG)
218     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
219 #endif
220     if (im[i] >= rstart_orig && im[i] < rend_orig) {
221       row = im[i] - rstart_orig;
222       for (j=0; j<n; j++) {
223         if (in[j] >= cstart_orig && in[j] < cend_orig) {
224           col = in[j] - cstart_orig;
225           if (roworiented) value = v[i*n+j];
226           else             value = v[i+j*m];
227           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
228           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
229         } else if (in[j] < 0) continue;
230 #if defined(PETSC_USE_DEBUG)
231         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
232 #endif
233         else {
234           if (mat->was_assembled) {
235             if (!baij->colmap) {
236               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
237             }
238 #if defined(PETSC_USE_CTABLE)
239             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
240             col  = col - 1;
241 #else
242             col = baij->colmap[in[j]/bs] - 1;
243 #endif
244             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
245               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
246               col  =  in[j];
247               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
248               B    = baij->B;
249               b    = (Mat_SeqBAIJ*)(B)->data;
250               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
251               ba   =b->a;
252             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
253             else col += in[j]%bs;
254           } else col = in[j];
255           if (roworiented) value = v[i*n+j];
256           else             value = v[i+j*m];
257           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
258           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
259         }
260       }
261     } else {
262       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
263       if (!baij->donotstash) {
264         mat->assembled = PETSC_FALSE;
265         if (roworiented) {
266           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
267         } else {
268           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
269         }
270       }
271     }
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNCT__
277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
279 {
280   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
281   const PetscScalar *value;
282   MatScalar         *barray     = baij->barray;
283   PetscBool         roworiented = baij->roworiented;
284   PetscErrorCode    ierr;
285   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
286   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
287   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
288 
289   PetscFunctionBegin;
290   if (!barray) {
291     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
292     baij->barray = barray;
293   }
294 
295   if (roworiented) stepval = (n-1)*bs;
296   else stepval = (m-1)*bs;
297 
298   for (i=0; i<m; i++) {
299     if (im[i] < 0) continue;
300 #if defined(PETSC_USE_DEBUG)
301     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
302 #endif
303     if (im[i] >= rstart && im[i] < rend) {
304       row = im[i] - rstart;
305       for (j=0; j<n; j++) {
306         /* If NumCol = 1 then a copy is not required */
307         if ((roworiented) && (n == 1)) {
308           barray = (MatScalar*)v + i*bs2;
309         } else if ((!roworiented) && (m == 1)) {
310           barray = (MatScalar*)v + j*bs2;
311         } else { /* Here a copy is required */
312           if (roworiented) {
313             value = v + (i*(stepval+bs) + j)*bs;
314           } else {
315             value = v + (j*(stepval+bs) + i)*bs;
316           }
317           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
318             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
319             barray += bs;
320           }
321           barray -= bs2;
322         }
323 
324         if (in[j] >= cstart && in[j] < cend) {
325           col  = in[j] - cstart;
326           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
327         } else if (in[j] < 0) continue;
328 #if defined(PETSC_USE_DEBUG)
329         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
330 #endif
331         else {
332           if (mat->was_assembled) {
333             if (!baij->colmap) {
334               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
335             }
336 
337 #if defined(PETSC_USE_DEBUG)
338 #if defined(PETSC_USE_CTABLE)
339             { PetscInt data;
340               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
341               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
342             }
343 #else
344             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
345 #endif
346 #endif
347 #if defined(PETSC_USE_CTABLE)
348             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
349             col  = (col - 1)/bs;
350 #else
351             col = (baij->colmap[in[j]] - 1)/bs;
352 #endif
353             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
354               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
355               col  =  in[j];
356             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
357           } else col = in[j];
358           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
359         }
360       }
361     } else {
362       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
363       if (!baij->donotstash) {
364         if (roworiented) {
365           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
366         } else {
367           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
368         }
369       }
370     }
371   }
372   PetscFunctionReturn(0);
373 }
374 
375 #define HASH_KEY 0.6180339887
376 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
377 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
378 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
379 #undef __FUNCT__
380 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
381 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
382 {
383   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
384   PetscBool      roworiented = baij->roworiented;
385   PetscErrorCode ierr;
386   PetscInt       i,j,row,col;
387   PetscInt       rstart_orig=mat->rmap->rstart;
388   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
389   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
390   PetscReal      tmp;
391   MatScalar      **HD = baij->hd,value;
392 #if defined(PETSC_USE_DEBUG)
393   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
394 #endif
395 
396   PetscFunctionBegin;
397   for (i=0; i<m; i++) {
398 #if defined(PETSC_USE_DEBUG)
399     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
400     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
401 #endif
402     row = im[i];
403     if (row >= rstart_orig && row < rend_orig) {
404       for (j=0; j<n; j++) {
405         col = in[j];
406         if (roworiented) value = v[i*n+j];
407         else             value = v[i+j*m];
408         /* Look up PetscInto the Hash Table */
409         key = (row/bs)*Nbs+(col/bs)+1;
410         h1  = HASH(size,key,tmp);
411 
412 
413         idx = h1;
414 #if defined(PETSC_USE_DEBUG)
415         insert_ct++;
416         total_ct++;
417         if (HT[idx] != key) {
418           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
419           if (idx == size) {
420             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
421             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
422           }
423         }
424 #else
425         if (HT[idx] != key) {
426           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
427           if (idx == size) {
428             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
429             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
430           }
431         }
432 #endif
433         /* A HASH table entry is found, so insert the values at the correct address */
434         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
435         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
436       }
437     } else if (!baij->donotstash) {
438       if (roworiented) {
439         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
440       } else {
441         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
442       }
443     }
444   }
445 #if defined(PETSC_USE_DEBUG)
446   baij->ht_total_ct  = total_ct;
447   baij->ht_insert_ct = insert_ct;
448 #endif
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
454 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
455 {
456   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
457   PetscBool         roworiented = baij->roworiented;
458   PetscErrorCode    ierr;
459   PetscInt          i,j,ii,jj,row,col;
460   PetscInt          rstart=baij->rstartbs;
461   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
462   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
463   PetscReal         tmp;
464   MatScalar         **HD = baij->hd,*baij_a;
465   const PetscScalar *v_t,*value;
466 #if defined(PETSC_USE_DEBUG)
467   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
468 #endif
469 
470   PetscFunctionBegin;
471   if (roworiented) stepval = (n-1)*bs;
472   else stepval = (m-1)*bs;
473 
474   for (i=0; i<m; i++) {
475 #if defined(PETSC_USE_DEBUG)
476     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
477     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
478 #endif
479     row = im[i];
480     v_t = v + i*nbs2;
481     if (row >= rstart && row < rend) {
482       for (j=0; j<n; j++) {
483         col = in[j];
484 
485         /* Look up into the Hash Table */
486         key = row*Nbs+col+1;
487         h1  = HASH(size,key,tmp);
488 
489         idx = h1;
490 #if defined(PETSC_USE_DEBUG)
491         total_ct++;
492         insert_ct++;
493         if (HT[idx] != key) {
494           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
495           if (idx == size) {
496             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
497             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
498           }
499         }
500 #else
501         if (HT[idx] != key) {
502           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
503           if (idx == size) {
504             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
505             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
506           }
507         }
508 #endif
509         baij_a = HD[idx];
510         if (roworiented) {
511           /*value = v + i*(stepval+bs)*bs + j*bs;*/
512           /* value = v + (i*(stepval+bs)+j)*bs; */
513           value = v_t;
514           v_t  += bs;
515           if (addv == ADD_VALUES) {
516             for (ii=0; ii<bs; ii++,value+=stepval) {
517               for (jj=ii; jj<bs2; jj+=bs) {
518                 baij_a[jj] += *value++;
519               }
520             }
521           } else {
522             for (ii=0; ii<bs; ii++,value+=stepval) {
523               for (jj=ii; jj<bs2; jj+=bs) {
524                 baij_a[jj] = *value++;
525               }
526             }
527           }
528         } else {
529           value = v + j*(stepval+bs)*bs + i*bs;
530           if (addv == ADD_VALUES) {
531             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
532               for (jj=0; jj<bs; jj++) {
533                 baij_a[jj] += *value++;
534               }
535             }
536           } else {
537             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
538               for (jj=0; jj<bs; jj++) {
539                 baij_a[jj] = *value++;
540               }
541             }
542           }
543         }
544       }
545     } else {
546       if (!baij->donotstash) {
547         if (roworiented) {
548           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
549         } else {
550           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
551         }
552       }
553     }
554   }
555 #if defined(PETSC_USE_DEBUG)
556   baij->ht_total_ct  = total_ct;
557   baij->ht_insert_ct = insert_ct;
558 #endif
559   PetscFunctionReturn(0);
560 }
561 
562 #undef __FUNCT__
563 #define __FUNCT__ "MatGetValues_MPIBAIJ"
564 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
565 {
566   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
567   PetscErrorCode ierr;
568   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
569   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
570 
571   PetscFunctionBegin;
572   for (i=0; i<m; i++) {
573     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
574     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
575     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
576       row = idxm[i] - bsrstart;
577       for (j=0; j<n; j++) {
578         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
579         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
580         if (idxn[j] >= bscstart && idxn[j] < bscend) {
581           col  = idxn[j] - bscstart;
582           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
583         } else {
584           if (!baij->colmap) {
585             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
586           }
587 #if defined(PETSC_USE_CTABLE)
588           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
589           data--;
590 #else
591           data = baij->colmap[idxn[j]/bs]-1;
592 #endif
593           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
594           else {
595             col  = data + idxn[j]%bs;
596             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
597           }
598         }
599       }
600     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
601   }
602   PetscFunctionReturn(0);
603 }
604 
605 #undef __FUNCT__
606 #define __FUNCT__ "MatNorm_MPIBAIJ"
607 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
608 {
609   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
610   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
611   PetscErrorCode ierr;
612   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
613   PetscReal      sum = 0.0;
614   MatScalar      *v;
615 
616   PetscFunctionBegin;
617   if (baij->size == 1) {
618     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
619   } else {
620     if (type == NORM_FROBENIUS) {
621       v  = amat->a;
622       nz = amat->nz*bs2;
623       for (i=0; i<nz; i++) {
624         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
625       }
626       v  = bmat->a;
627       nz = bmat->nz*bs2;
628       for (i=0; i<nz; i++) {
629         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
630       }
631       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
632       *nrm = PetscSqrtReal(*nrm);
633     } else if (type == NORM_1) { /* max column sum */
634       PetscReal *tmp,*tmp2;
635       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
636       ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr);
637       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
638       v    = amat->a; jj = amat->j;
639       for (i=0; i<amat->nz; i++) {
640         for (j=0; j<bs; j++) {
641           col = bs*(cstart + *jj) + j; /* column index */
642           for (row=0; row<bs; row++) {
643             tmp[col] += PetscAbsScalar(*v);  v++;
644           }
645         }
646         jj++;
647       }
648       v = bmat->a; jj = bmat->j;
649       for (i=0; i<bmat->nz; i++) {
650         for (j=0; j<bs; j++) {
651           col = bs*garray[*jj] + j;
652           for (row=0; row<bs; row++) {
653             tmp[col] += PetscAbsScalar(*v); v++;
654           }
655         }
656         jj++;
657       }
658       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
659       *nrm = 0.0;
660       for (j=0; j<mat->cmap->N; j++) {
661         if (tmp2[j] > *nrm) *nrm = tmp2[j];
662       }
663       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
664     } else if (type == NORM_INFINITY) { /* max row sum */
665       PetscReal *sums;
666       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
667       sum  = 0.0;
668       for (j=0; j<amat->mbs; j++) {
669         for (row=0; row<bs; row++) sums[row] = 0.0;
670         v  = amat->a + bs2*amat->i[j];
671         nz = amat->i[j+1]-amat->i[j];
672         for (i=0; i<nz; i++) {
673           for (col=0; col<bs; col++) {
674             for (row=0; row<bs; row++) {
675               sums[row] += PetscAbsScalar(*v); v++;
676             }
677           }
678         }
679         v  = bmat->a + bs2*bmat->i[j];
680         nz = bmat->i[j+1]-bmat->i[j];
681         for (i=0; i<nz; i++) {
682           for (col=0; col<bs; col++) {
683             for (row=0; row<bs; row++) {
684               sums[row] += PetscAbsScalar(*v); v++;
685             }
686           }
687         }
688         for (row=0; row<bs; row++) {
689           if (sums[row] > sum) sum = sums[row];
690         }
691       }
692       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
693       ierr = PetscFree(sums);CHKERRQ(ierr);
694     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
695   }
696   PetscFunctionReturn(0);
697 }
698 
699 /*
700   Creates the hash table, and sets the table
701   This table is created only once.
702   If new entried need to be added to the matrix
703   then the hash table has to be destroyed and
704   recreated.
705 */
706 #undef __FUNCT__
707 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
708 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
709 {
710   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
711   Mat            A     = baij->A,B=baij->B;
712   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
713   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
714   PetscErrorCode ierr;
715   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
716   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
717   PetscInt       *HT,key;
718   MatScalar      **HD;
719   PetscReal      tmp;
720 #if defined(PETSC_USE_INFO)
721   PetscInt ct=0,max=0;
722 #endif
723 
724   PetscFunctionBegin;
725   if (baij->ht) PetscFunctionReturn(0);
726 
727   baij->ht_size = (PetscInt)(factor*nz);
728   ht_size       = baij->ht_size;
729 
730   /* Allocate Memory for Hash Table */
731   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
732   HD   = baij->hd;
733   HT   = baij->ht;
734 
735   /* Loop Over A */
736   for (i=0; i<a->mbs; i++) {
737     for (j=ai[i]; j<ai[i+1]; j++) {
738       row = i+rstart;
739       col = aj[j]+cstart;
740 
741       key = row*Nbs + col + 1;
742       h1  = HASH(ht_size,key,tmp);
743       for (k=0; k<ht_size; k++) {
744         if (!HT[(h1+k)%ht_size]) {
745           HT[(h1+k)%ht_size] = key;
746           HD[(h1+k)%ht_size] = a->a + j*bs2;
747           break;
748 #if defined(PETSC_USE_INFO)
749         } else {
750           ct++;
751 #endif
752         }
753       }
754 #if defined(PETSC_USE_INFO)
755       if (k> max) max = k;
756 #endif
757     }
758   }
759   /* Loop Over B */
760   for (i=0; i<b->mbs; i++) {
761     for (j=bi[i]; j<bi[i+1]; j++) {
762       row = i+rstart;
763       col = garray[bj[j]];
764       key = row*Nbs + col + 1;
765       h1  = HASH(ht_size,key,tmp);
766       for (k=0; k<ht_size; k++) {
767         if (!HT[(h1+k)%ht_size]) {
768           HT[(h1+k)%ht_size] = key;
769           HD[(h1+k)%ht_size] = b->a + j*bs2;
770           break;
771 #if defined(PETSC_USE_INFO)
772         } else {
773           ct++;
774 #endif
775         }
776       }
777 #if defined(PETSC_USE_INFO)
778       if (k> max) max = k;
779 #endif
780     }
781   }
782 
783   /* Print Summary */
784 #if defined(PETSC_USE_INFO)
785   for (i=0,j=0; i<ht_size; i++) {
786     if (HT[i]) j++;
787   }
788   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
789 #endif
790   PetscFunctionReturn(0);
791 }
792 
793 #undef __FUNCT__
794 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
795 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
796 {
797   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
798   PetscErrorCode ierr;
799   PetscInt       nstash,reallocs;
800   InsertMode     addv;
801 
802   PetscFunctionBegin;
803   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
804 
805   /* make sure all processors are either in INSERTMODE or ADDMODE */
806   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
807   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
808   mat->insertmode = addv; /* in case this processor had no cache */
809 
810   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
811   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
812   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
813   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
814   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
815   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNCT__
820 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
821 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
822 {
823   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
824   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
825   PetscErrorCode ierr;
826   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
827   PetscInt       *row,*col;
828   PetscBool      r1,r2,r3,other_disassembled;
829   MatScalar      *val;
830   InsertMode     addv = mat->insertmode;
831   PetscMPIInt    n;
832 
833   PetscFunctionBegin;
834   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
835   if (!baij->donotstash && !mat->nooffprocentries) {
836     while (1) {
837       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
838       if (!flg) break;
839 
840       for (i=0; i<n;) {
841         /* Now identify the consecutive vals belonging to the same row */
842         for (j=i,rstart=row[j]; j<n; j++) {
843           if (row[j] != rstart) break;
844         }
845         if (j < n) ncols = j-i;
846         else       ncols = n-i;
847         /* Now assemble all these values with a single function call */
848         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
849         i    = j;
850       }
851     }
852     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
853     /* Now process the block-stash. Since the values are stashed column-oriented,
854        set the roworiented flag to column oriented, and after MatSetValues()
855        restore the original flags */
856     r1 = baij->roworiented;
857     r2 = a->roworiented;
858     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
859 
860     baij->roworiented = PETSC_FALSE;
861     a->roworiented    = PETSC_FALSE;
862 
863     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
864     while (1) {
865       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
866       if (!flg) break;
867 
868       for (i=0; i<n;) {
869         /* Now identify the consecutive vals belonging to the same row */
870         for (j=i,rstart=row[j]; j<n; j++) {
871           if (row[j] != rstart) break;
872         }
873         if (j < n) ncols = j-i;
874         else       ncols = n-i;
875         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
876         i    = j;
877       }
878     }
879     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
880 
881     baij->roworiented = r1;
882     a->roworiented    = r2;
883 
884     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
885   }
886 
887   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
888   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
889 
890   /* determine if any processor has disassembled, if so we must
891      also disassemble ourselfs, in order that we may reassemble. */
892   /*
893      if nonzero structure of submatrix B cannot change then we know that
894      no processor disassembled thus we can skip this stuff
895   */
896   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
897     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
898     if (mat->was_assembled && !other_disassembled) {
899       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
900     }
901   }
902 
903   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
904     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
905   }
906   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
907   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
908 
909 #if defined(PETSC_USE_INFO)
910   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
911     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
912 
913     baij->ht_total_ct  = 0;
914     baij->ht_insert_ct = 0;
915   }
916 #endif
917   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
918     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
919 
920     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
921     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
922   }
923 
924   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
925 
926   baij->rowvalues = 0;
927 
928   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
929   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
930     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
931     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
932   }
933   PetscFunctionReturn(0);
934 }
935 
936 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
937 #include <petscdraw.h>
938 #undef __FUNCT__
939 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
940 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
941 {
942   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
943   PetscErrorCode    ierr;
944   PetscMPIInt       rank = baij->rank;
945   PetscInt          bs   = mat->rmap->bs;
946   PetscBool         iascii,isdraw;
947   PetscViewer       sviewer;
948   PetscViewerFormat format;
949 
950   PetscFunctionBegin;
951   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
952   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
953   if (iascii) {
954     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
955     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
956       MatInfo info;
957       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
958       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
959       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
960       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
961                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
962       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
963       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
964       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
965       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
966       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
967       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
968       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
969       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
970       PetscFunctionReturn(0);
971     } else if (format == PETSC_VIEWER_ASCII_INFO) {
972       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
973       PetscFunctionReturn(0);
974     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
975       PetscFunctionReturn(0);
976     }
977   }
978 
979   if (isdraw) {
980     PetscDraw draw;
981     PetscBool isnull;
982     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
983     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
984   }
985 
986   {
987     /* assemble the entire matrix onto first processor. */
988     Mat         A;
989     Mat_SeqBAIJ *Aloc;
990     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
991     MatScalar   *a;
992 
993     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
994     /* Perhaps this should be the type of mat? */
995     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
996     if (!rank) {
997       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
998     } else {
999       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1000     }
1001     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1002     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1003     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1004     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1005 
1006     /* copy over the A part */
1007     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1008     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1009     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1010 
1011     for (i=0; i<mbs; i++) {
1012       rvals[0] = bs*(baij->rstartbs + i);
1013       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1014       for (j=ai[i]; j<ai[i+1]; j++) {
1015         col = (baij->cstartbs+aj[j])*bs;
1016         for (k=0; k<bs; k++) {
1017           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1018           col++; a += bs;
1019         }
1020       }
1021     }
1022     /* copy over the B part */
1023     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1024     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1025     for (i=0; i<mbs; i++) {
1026       rvals[0] = bs*(baij->rstartbs + i);
1027       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1028       for (j=ai[i]; j<ai[i+1]; j++) {
1029         col = baij->garray[aj[j]]*bs;
1030         for (k=0; k<bs; k++) {
1031           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1032           col++; a += bs;
1033         }
1034       }
1035     }
1036     ierr = PetscFree(rvals);CHKERRQ(ierr);
1037     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1038     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1039     /*
1040        Everyone has to call to draw the matrix since the graphics waits are
1041        synchronized across all processors that share the PetscDraw object
1042     */
1043     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1044     if (!rank) {
1045       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1046     }
1047     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1048     ierr = MatDestroy(&A);CHKERRQ(ierr);
1049   }
1050   PetscFunctionReturn(0);
1051 }
1052 
1053 #undef __FUNCT__
1054 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1055 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1056 {
1057   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1058   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1059   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1060   PetscErrorCode ierr;
1061   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1062   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1063   int            fd;
1064   PetscScalar    *column_values;
1065   FILE           *file;
1066   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1067   PetscInt       message_count,flowcontrolcount;
1068 
1069   PetscFunctionBegin;
1070   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1071   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1072   nz   = bs2*(A->nz + B->nz);
1073   rlen = mat->rmap->n;
1074   if (!rank) {
1075     header[0] = MAT_FILE_CLASSID;
1076     header[1] = mat->rmap->N;
1077     header[2] = mat->cmap->N;
1078 
1079     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1080     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1081     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1082     /* get largest number of rows any processor has */
1083     range = mat->rmap->range;
1084     for (i=1; i<size; i++) {
1085       rlen = PetscMax(rlen,range[i+1] - range[i]);
1086     }
1087   } else {
1088     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1089   }
1090 
1091   ierr = PetscMalloc1((rlen/bs),&crow_lens);CHKERRQ(ierr);
1092   /* compute lengths of each row  */
1093   for (i=0; i<a->mbs; i++) {
1094     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1095   }
1096   /* store the row lengths to the file */
1097   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1098   if (!rank) {
1099     MPI_Status status;
1100     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1101     rlen = (range[1] - range[0])/bs;
1102     for (i=0; i<rlen; i++) {
1103       for (j=0; j<bs; j++) {
1104         row_lens[i*bs+j] = bs*crow_lens[i];
1105       }
1106     }
1107     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1108     for (i=1; i<size; i++) {
1109       rlen = (range[i+1] - range[i])/bs;
1110       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1111       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1112       for (k=0; k<rlen; k++) {
1113         for (j=0; j<bs; j++) {
1114           row_lens[k*bs+j] = bs*crow_lens[k];
1115         }
1116       }
1117       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1118     }
1119     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1120     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1121   } else {
1122     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1123     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1124     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1125   }
1126   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1127 
1128   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1129      information needed to make it for each row from a block row. This does require more communication but still not more than
1130      the communication needed for the nonzero values  */
1131   nzmax = nz; /*  space a largest processor needs */
1132   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1133   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1134   cnt   = 0;
1135   for (i=0; i<a->mbs; i++) {
1136     pcnt = cnt;
1137     for (j=B->i[i]; j<B->i[i+1]; j++) {
1138       if ((col = garray[B->j[j]]) > cstart) break;
1139       for (l=0; l<bs; l++) {
1140         column_indices[cnt++] = bs*col+l;
1141       }
1142     }
1143     for (k=A->i[i]; k<A->i[i+1]; k++) {
1144       for (l=0; l<bs; l++) {
1145         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1146       }
1147     }
1148     for (; j<B->i[i+1]; j++) {
1149       for (l=0; l<bs; l++) {
1150         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1151       }
1152     }
1153     len = cnt - pcnt;
1154     for (k=1; k<bs; k++) {
1155       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1156       cnt += len;
1157     }
1158   }
1159   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1160 
1161   /* store the columns to the file */
1162   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1163   if (!rank) {
1164     MPI_Status status;
1165     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1166     for (i=1; i<size; i++) {
1167       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1168       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1169       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1170       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1171     }
1172     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1173   } else {
1174     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1175     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1176     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1177     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1178   }
1179   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1180 
1181   /* load up the numerical values */
1182   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1183   cnt  = 0;
1184   for (i=0; i<a->mbs; i++) {
1185     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1186     for (j=B->i[i]; j<B->i[i+1]; j++) {
1187       if (garray[B->j[j]] > cstart) break;
1188       for (l=0; l<bs; l++) {
1189         for (ll=0; ll<bs; ll++) {
1190           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1191         }
1192       }
1193       cnt += bs;
1194     }
1195     for (k=A->i[i]; k<A->i[i+1]; k++) {
1196       for (l=0; l<bs; l++) {
1197         for (ll=0; ll<bs; ll++) {
1198           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1199         }
1200       }
1201       cnt += bs;
1202     }
1203     for (; j<B->i[i+1]; j++) {
1204       for (l=0; l<bs; l++) {
1205         for (ll=0; ll<bs; ll++) {
1206           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1207         }
1208       }
1209       cnt += bs;
1210     }
1211     cnt += (bs-1)*rlen;
1212   }
1213   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1214 
1215   /* store the column values to the file */
1216   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1217   if (!rank) {
1218     MPI_Status status;
1219     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1220     for (i=1; i<size; i++) {
1221       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1222       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1223       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1224       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1225     }
1226     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1227   } else {
1228     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1229     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1230     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1231     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1232   }
1233   ierr = PetscFree(column_values);CHKERRQ(ierr);
1234 
1235   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1236   if (file) {
1237     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1238   }
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 #undef __FUNCT__
1243 #define __FUNCT__ "MatView_MPIBAIJ"
1244 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1245 {
1246   PetscErrorCode ierr;
1247   PetscBool      iascii,isdraw,issocket,isbinary;
1248 
1249   PetscFunctionBegin;
1250   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1251   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1252   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1253   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1254   if (iascii || isdraw || issocket) {
1255     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1256   } else if (isbinary) {
1257     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1258   }
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1264 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1265 {
1266   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1267   PetscErrorCode ierr;
1268 
1269   PetscFunctionBegin;
1270 #if defined(PETSC_USE_LOG)
1271   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1272 #endif
1273   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1274   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1275   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1276   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1277 #if defined(PETSC_USE_CTABLE)
1278   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1279 #else
1280   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1281 #endif
1282   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1283   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1284   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1285   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1286   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1287   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1288   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1289   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1290 
1291   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1292   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1294   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 #undef __FUNCT__
1305 #define __FUNCT__ "MatMult_MPIBAIJ"
1306 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1307 {
1308   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1309   PetscErrorCode ierr;
1310   PetscInt       nt;
1311 
1312   PetscFunctionBegin;
1313   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1314   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1315   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1316   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1317   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1318   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1319   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1320   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 #undef __FUNCT__
1325 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1326 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1327 {
1328   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1329   PetscErrorCode ierr;
1330 
1331   PetscFunctionBegin;
1332   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1333   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1334   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1335   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #undef __FUNCT__
1340 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1341 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1342 {
1343   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1344   PetscErrorCode ierr;
1345   PetscBool      merged;
1346 
1347   PetscFunctionBegin;
1348   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1349   /* do nondiagonal part */
1350   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1351   if (!merged) {
1352     /* send it on its way */
1353     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1354     /* do local part */
1355     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1356     /* receive remote parts: note this assumes the values are not actually */
1357     /* inserted in yy until the next line */
1358     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1359   } else {
1360     /* do local part */
1361     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1362     /* send it on its way */
1363     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1364     /* values actually were received in the Begin() but we need to call this nop */
1365     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1366   }
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #undef __FUNCT__
1371 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1372 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1373 {
1374   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1375   PetscErrorCode ierr;
1376 
1377   PetscFunctionBegin;
1378   /* do nondiagonal part */
1379   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1380   /* send it on its way */
1381   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1382   /* do local part */
1383   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1384   /* receive remote parts: note this assumes the values are not actually */
1385   /* inserted in yy until the next line, which is true for my implementation*/
1386   /* but is not perhaps always true. */
1387   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1388   PetscFunctionReturn(0);
1389 }
1390 
1391 /*
1392   This only works correctly for square matrices where the subblock A->A is the
1393    diagonal block
1394 */
1395 #undef __FUNCT__
1396 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1397 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1398 {
1399   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1400   PetscErrorCode ierr;
1401 
1402   PetscFunctionBegin;
1403   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1404   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 #undef __FUNCT__
1409 #define __FUNCT__ "MatScale_MPIBAIJ"
1410 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1411 {
1412   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1413   PetscErrorCode ierr;
1414 
1415   PetscFunctionBegin;
1416   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1417   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1423 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1424 {
1425   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1426   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1427   PetscErrorCode ierr;
1428   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1429   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1430   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1431 
1432   PetscFunctionBegin;
1433   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1434   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1435   mat->getrowactive = PETSC_TRUE;
1436 
1437   if (!mat->rowvalues && (idx || v)) {
1438     /*
1439         allocate enough space to hold information from the longest row.
1440     */
1441     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1442     PetscInt    max = 1,mbs = mat->mbs,tmp;
1443     for (i=0; i<mbs; i++) {
1444       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1445       if (max < tmp) max = tmp;
1446     }
1447     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1448   }
1449   lrow = row - brstart;
1450 
1451   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1452   if (!v)   {pvA = 0; pvB = 0;}
1453   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1454   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1455   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1456   nztot = nzA + nzB;
1457 
1458   cmap = mat->garray;
1459   if (v  || idx) {
1460     if (nztot) {
1461       /* Sort by increasing column numbers, assuming A and B already sorted */
1462       PetscInt imark = -1;
1463       if (v) {
1464         *v = v_p = mat->rowvalues;
1465         for (i=0; i<nzB; i++) {
1466           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1467           else break;
1468         }
1469         imark = i;
1470         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1471         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1472       }
1473       if (idx) {
1474         *idx = idx_p = mat->rowindices;
1475         if (imark > -1) {
1476           for (i=0; i<imark; i++) {
1477             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1478           }
1479         } else {
1480           for (i=0; i<nzB; i++) {
1481             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1482             else break;
1483           }
1484           imark = i;
1485         }
1486         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1487         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1488       }
1489     } else {
1490       if (idx) *idx = 0;
1491       if (v)   *v   = 0;
1492     }
1493   }
1494   *nz  = nztot;
1495   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1496   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1497   PetscFunctionReturn(0);
1498 }
1499 
1500 #undef __FUNCT__
1501 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1502 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1503 {
1504   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1505 
1506   PetscFunctionBegin;
1507   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1508   baij->getrowactive = PETSC_FALSE;
1509   PetscFunctionReturn(0);
1510 }
1511 
1512 #undef __FUNCT__
1513 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1514 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1515 {
1516   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1517   PetscErrorCode ierr;
1518 
1519   PetscFunctionBegin;
1520   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1521   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1522   PetscFunctionReturn(0);
1523 }
1524 
1525 #undef __FUNCT__
1526 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1527 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1528 {
1529   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1530   Mat            A  = a->A,B = a->B;
1531   PetscErrorCode ierr;
1532   PetscReal      isend[5],irecv[5];
1533 
1534   PetscFunctionBegin;
1535   info->block_size = (PetscReal)matin->rmap->bs;
1536 
1537   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1538 
1539   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1540   isend[3] = info->memory;  isend[4] = info->mallocs;
1541 
1542   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1543 
1544   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1545   isend[3] += info->memory;  isend[4] += info->mallocs;
1546 
1547   if (flag == MAT_LOCAL) {
1548     info->nz_used      = isend[0];
1549     info->nz_allocated = isend[1];
1550     info->nz_unneeded  = isend[2];
1551     info->memory       = isend[3];
1552     info->mallocs      = isend[4];
1553   } else if (flag == MAT_GLOBAL_MAX) {
1554     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1555 
1556     info->nz_used      = irecv[0];
1557     info->nz_allocated = irecv[1];
1558     info->nz_unneeded  = irecv[2];
1559     info->memory       = irecv[3];
1560     info->mallocs      = irecv[4];
1561   } else if (flag == MAT_GLOBAL_SUM) {
1562     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1563 
1564     info->nz_used      = irecv[0];
1565     info->nz_allocated = irecv[1];
1566     info->nz_unneeded  = irecv[2];
1567     info->memory       = irecv[3];
1568     info->mallocs      = irecv[4];
1569   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1570   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1571   info->fill_ratio_needed = 0;
1572   info->factor_mallocs    = 0;
1573   PetscFunctionReturn(0);
1574 }
1575 
1576 #undef __FUNCT__
1577 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1578 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1579 {
1580   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1581   PetscErrorCode ierr;
1582 
1583   PetscFunctionBegin;
1584   switch (op) {
1585   case MAT_NEW_NONZERO_LOCATIONS:
1586   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1587   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1588   case MAT_KEEP_NONZERO_PATTERN:
1589   case MAT_NEW_NONZERO_LOCATION_ERR:
1590     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1591     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1592     break;
1593   case MAT_ROW_ORIENTED:
1594     a->roworiented = flg;
1595 
1596     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1597     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1598     break;
1599   case MAT_NEW_DIAGONALS:
1600     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1601     break;
1602   case MAT_IGNORE_OFF_PROC_ENTRIES:
1603     a->donotstash = flg;
1604     break;
1605   case MAT_USE_HASH_TABLE:
1606     a->ht_flag = flg;
1607     break;
1608   case MAT_SYMMETRIC:
1609   case MAT_STRUCTURALLY_SYMMETRIC:
1610   case MAT_HERMITIAN:
1611   case MAT_SYMMETRY_ETERNAL:
1612     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1613     break;
1614   default:
1615     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1616   }
1617   PetscFunctionReturn(0);
1618 }
1619 
1620 #undef __FUNCT__
1621 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1622 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1623 {
1624   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1625   Mat_SeqBAIJ    *Aloc;
1626   Mat            B;
1627   PetscErrorCode ierr;
1628   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1629   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1630   MatScalar      *a;
1631 
1632   PetscFunctionBegin;
1633   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1634   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1635     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1636     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1637     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1638     /* Do not know preallocation information, but must set block size */
1639     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1640   } else {
1641     B = *matout;
1642   }
1643 
1644   /* copy over the A part */
1645   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1646   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1647   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1648 
1649   for (i=0; i<mbs; i++) {
1650     rvals[0] = bs*(baij->rstartbs + i);
1651     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1652     for (j=ai[i]; j<ai[i+1]; j++) {
1653       col = (baij->cstartbs+aj[j])*bs;
1654       for (k=0; k<bs; k++) {
1655         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1656 
1657         col++; a += bs;
1658       }
1659     }
1660   }
1661   /* copy over the B part */
1662   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1663   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1664   for (i=0; i<mbs; i++) {
1665     rvals[0] = bs*(baij->rstartbs + i);
1666     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1667     for (j=ai[i]; j<ai[i+1]; j++) {
1668       col = baij->garray[aj[j]]*bs;
1669       for (k=0; k<bs; k++) {
1670         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1671         col++;
1672         a += bs;
1673       }
1674     }
1675   }
1676   ierr = PetscFree(rvals);CHKERRQ(ierr);
1677   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1678   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1679 
1680   if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
1681   else {
1682     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1683   }
1684   PetscFunctionReturn(0);
1685 }
1686 
1687 #undef __FUNCT__
1688 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1689 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1690 {
1691   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1692   Mat            a     = baij->A,b = baij->B;
1693   PetscErrorCode ierr;
1694   PetscInt       s1,s2,s3;
1695 
1696   PetscFunctionBegin;
1697   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1698   if (rr) {
1699     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1700     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1701     /* Overlap communication with computation. */
1702     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1703   }
1704   if (ll) {
1705     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1706     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1707     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1708   }
1709   /* scale  the diagonal block */
1710   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1711 
1712   if (rr) {
1713     /* Do a scatter end and then right scale the off-diagonal block */
1714     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1715     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1716   }
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1722 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1723 {
1724   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1725   PetscInt      *owners = A->rmap->range;
1726   PetscInt       n      = A->rmap->n;
1727   PetscMPIInt    size   = l->size;
1728   PetscSF        sf;
1729   PetscInt      *lrows;
1730   PetscSFNode   *rrows;
1731   PetscInt       lastidx = -1, r, p = 0, len = 0;
1732   PetscErrorCode ierr;
1733 
1734   PetscFunctionBegin;
1735   /* Create SF where leaves are input rows and roots are owned rows */
1736   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1737   for (r = 0; r < n; ++r) lrows[r] = -1;
1738   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1739   for (r = 0; r < N; ++r) {
1740     const PetscInt idx   = rows[r];
1741     PetscBool      found = PETSC_FALSE;
1742     /* Trick for efficient searching for sorted rows */
1743     if (lastidx > idx) p = 0;
1744     lastidx = idx;
1745     for (; p < size; ++p) {
1746       if (idx >= owners[p] && idx < owners[p+1]) {
1747         rrows[r].rank  = p;
1748         rrows[r].index = rows[r] - owners[p];
1749         found = PETSC_TRUE;
1750         break;
1751       }
1752     }
1753     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
1754   }
1755   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1756   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1757   /* Collect flags for rows to be zeroed */
1758   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1759   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1760   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1761   /* Compress and put in row numbers */
1762   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1763   /* fix right hand side if needed */
1764   if (x && b) {
1765     const PetscScalar *xx;
1766     PetscScalar       *bb;
1767 
1768     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1769     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1770     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1771     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1772     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1773   }
1774 
1775   /* actually zap the local rows */
1776   /*
1777         Zero the required rows. If the "diagonal block" of the matrix
1778      is square and the user wishes to set the diagonal we use separate
1779      code so that MatSetValues() is not called for each diagonal allocating
1780      new memory, thus calling lots of mallocs and slowing things down.
1781 
1782   */
1783   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1784   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,0,0);CHKERRQ(ierr);
1785   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1786     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,0,0);CHKERRQ(ierr);
1787   } else if (diag != 0.0) {
1788     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1789     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1790        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1791     for (r = 0; r < len; ++r) {
1792       const PetscInt row = lrows[r] + A->rmap->rstart;
1793       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1794     }
1795     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1796     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1797   } else {
1798     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1799   }
1800   ierr = PetscFree(lrows);CHKERRQ(ierr);
1801 
1802   /* only change matrix nonzero state if pattern was allowed to be changed */
1803   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1804     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1805     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1806   }
1807   PetscFunctionReturn(0);
1808 }
1809 
1810 #undef __FUNCT__
1811 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ"
1812 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1813 {
1814   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1815   PetscErrorCode    ierr;
1816   PetscMPIInt       size = l->size,n = A->rmap->n,lastidx = -1;
1817   PetscInt          i,j,k,r,p = 0,len = 0,row,col,count;
1818   PetscInt          *lrows,*owners = A->rmap->range;
1819   PetscSFNode       *rrows;
1820   PetscSF           sf;
1821   const PetscScalar *xx;
1822   PetscScalar       *bb,*mask;
1823   Vec               xmask,lmask;
1824   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1825   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1826   PetscScalar       *aa;
1827 #if defined(PETSC_DEBUG)
1828   PetscBool found = PETSC_FALSE;
1829 #endif
1830 
1831   PetscFunctionBegin;
1832   /* Create SF where leaves are input rows and roots are owned rows */
1833   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1834   for (r = 0; r < n; ++r) lrows[r] = -1;
1835   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1836   for (r = 0; r < N; ++r) {
1837     const PetscInt idx   = rows[r];
1838     PetscBool      found = PETSC_FALSE;
1839     /* Trick for efficient searching for sorted rows */
1840     if (lastidx > idx) p = 0;
1841     lastidx = idx;
1842     for (; p < size; ++p) {
1843       if (idx >= owners[p] && idx < owners[p+1]) {
1844         rrows[r].rank  = p;
1845         rrows[r].index = rows[r] - owners[p];
1846         found = PETSC_TRUE;
1847         break;
1848       }
1849     }
1850     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
1851   }
1852   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1853   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1854   /* Collect flags for rows to be zeroed */
1855   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1856   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1857   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1858   /* Compress and put in row numbers */
1859   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1860   /* zero diagonal part of matrix */
1861   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1862   /* handle off diagonal part of matrix */
1863   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1864   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1865   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1866   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1867   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1868   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1869   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1870   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1871   if (x) {
1872     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1873     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1874     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1875     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1876   }
1877   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1878   /* remove zeroed rows of off diagonal matrix */
1879   for (i = 0; i < len; ++i) {
1880     row   = lrows[i];
1881     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1882     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1883     for (k = 0; k < count; ++k) {
1884       aa[0] = 0.0;
1885       aa   += bs;
1886     }
1887   }
1888   /* loop over all elements of off process part of matrix zeroing removed columns*/
1889   for (i = 0; i < l->B->rmap->N; ++i) {
1890     row = i/bs;
1891     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1892       for (k = 0; k < bs; ++k) {
1893         col = bs*baij->j[j] + k;
1894         if (PetscAbsScalar(mask[col])) {
1895           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1896           if (b) bb[i] -= aa[0]*xx[col];
1897           aa[0] = 0.0;
1898         }
1899       }
1900     }
1901   }
1902   if (x) {
1903     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1904     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1905   }
1906   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1907   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1908   ierr = PetscFree(lrows);CHKERRQ(ierr);
1909 
1910   /* only change matrix nonzero state if pattern was allowed to be changed */
1911   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1912     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1913     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1914   }
1915   PetscFunctionReturn(0);
1916 }
1917 
1918 #undef __FUNCT__
1919 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1920 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1921 {
1922   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1923   PetscErrorCode ierr;
1924 
1925   PetscFunctionBegin;
1926   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1927   PetscFunctionReturn(0);
1928 }
1929 
1930 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1931 
1932 #undef __FUNCT__
1933 #define __FUNCT__ "MatEqual_MPIBAIJ"
1934 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1935 {
1936   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1937   Mat            a,b,c,d;
1938   PetscBool      flg;
1939   PetscErrorCode ierr;
1940 
1941   PetscFunctionBegin;
1942   a = matA->A; b = matA->B;
1943   c = matB->A; d = matB->B;
1944 
1945   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1946   if (flg) {
1947     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1948   }
1949   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1950   PetscFunctionReturn(0);
1951 }
1952 
1953 #undef __FUNCT__
1954 #define __FUNCT__ "MatCopy_MPIBAIJ"
1955 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1956 {
1957   PetscErrorCode ierr;
1958   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1959   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1960 
1961   PetscFunctionBegin;
1962   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1963   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1964     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1965   } else {
1966     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1967     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1968   }
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1974 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1975 {
1976   PetscErrorCode ierr;
1977 
1978   PetscFunctionBegin;
1979   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1980   PetscFunctionReturn(0);
1981 }
1982 
1983 #undef __FUNCT__
1984 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1985 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1986 {
1987   PetscErrorCode ierr;
1988   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1989   PetscBLASInt   bnz,one=1;
1990   Mat_SeqBAIJ    *x,*y;
1991 
1992   PetscFunctionBegin;
1993   if (str == SAME_NONZERO_PATTERN) {
1994     PetscScalar alpha = a;
1995     x    = (Mat_SeqBAIJ*)xx->A->data;
1996     y    = (Mat_SeqBAIJ*)yy->A->data;
1997     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1998     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1999     x    = (Mat_SeqBAIJ*)xx->B->data;
2000     y    = (Mat_SeqBAIJ*)yy->B->data;
2001     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2002     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2003     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2004   } else {
2005     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2006   }
2007   PetscFunctionReturn(0);
2008 }
2009 
2010 #undef __FUNCT__
2011 #define __FUNCT__ "MatRealPart_MPIBAIJ"
2012 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2013 {
2014   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2015   PetscErrorCode ierr;
2016 
2017   PetscFunctionBegin;
2018   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2019   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2020   PetscFunctionReturn(0);
2021 }
2022 
2023 #undef __FUNCT__
2024 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2025 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2026 {
2027   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2028   PetscErrorCode ierr;
2029 
2030   PetscFunctionBegin;
2031   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2032   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2033   PetscFunctionReturn(0);
2034 }
2035 
2036 #undef __FUNCT__
2037 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2038 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2039 {
2040   PetscErrorCode ierr;
2041   IS             iscol_local;
2042   PetscInt       csize;
2043 
2044   PetscFunctionBegin;
2045   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2046   if (call == MAT_REUSE_MATRIX) {
2047     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2048     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2049   } else {
2050     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2051   }
2052   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2053   if (call == MAT_INITIAL_MATRIX) {
2054     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2055     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2056   }
2057   PetscFunctionReturn(0);
2058 }
2059 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2060 #undef __FUNCT__
2061 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2062 /*
2063   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2064   in local and then by concatenating the local matrices the end result.
2065   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2066 */
2067 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2068 {
2069   PetscErrorCode ierr;
2070   PetscMPIInt    rank,size;
2071   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2072   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2073   Mat            M,Mreuse;
2074   MatScalar      *vwork,*aa;
2075   MPI_Comm       comm;
2076   IS             isrow_new, iscol_new;
2077   PetscBool      idflag,allrows, allcols;
2078   Mat_SeqBAIJ    *aij;
2079 
2080   PetscFunctionBegin;
2081   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2082   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2083   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2084   /* The compression and expansion should be avoided. Doesn't point
2085      out errors, might change the indices, hence buggey */
2086   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2087   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2088 
2089   /* Check for special case: each processor gets entire matrix columns */
2090   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2091   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2092   if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
2093   else allcols = PETSC_FALSE;
2094 
2095   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2096   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2097   if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
2098   else allrows = PETSC_FALSE;
2099 
2100   if (call ==  MAT_REUSE_MATRIX) {
2101     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2102     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2103     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2104   } else {
2105     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2106   }
2107   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2108   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2109   /*
2110       m - number of local rows
2111       n - number of columns (same on all processors)
2112       rstart - first row in new global matrix generated
2113   */
2114   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2115   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2116   m    = m/bs;
2117   n    = n/bs;
2118 
2119   if (call == MAT_INITIAL_MATRIX) {
2120     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2121     ii  = aij->i;
2122     jj  = aij->j;
2123 
2124     /*
2125         Determine the number of non-zeros in the diagonal and off-diagonal
2126         portions of the matrix in order to do correct preallocation
2127     */
2128 
2129     /* first get start and end of "diagonal" columns */
2130     if (csize == PETSC_DECIDE) {
2131       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2132       if (mglobal == n*bs) { /* square matrix */
2133         nlocal = m;
2134       } else {
2135         nlocal = n/size + ((n % size) > rank);
2136       }
2137     } else {
2138       nlocal = csize/bs;
2139     }
2140     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2141     rstart = rend - nlocal;
2142     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2143 
2144     /* next, compute all the lengths */
2145     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2146     for (i=0; i<m; i++) {
2147       jend = ii[i+1] - ii[i];
2148       olen = 0;
2149       dlen = 0;
2150       for (j=0; j<jend; j++) {
2151         if (*jj < rstart || *jj >= rend) olen++;
2152         else dlen++;
2153         jj++;
2154       }
2155       olens[i] = olen;
2156       dlens[i] = dlen;
2157     }
2158     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2159     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2160     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2161     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2162     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2163   } else {
2164     PetscInt ml,nl;
2165 
2166     M    = *newmat;
2167     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2168     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2169     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2170     /*
2171          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2172        rather than the slower MatSetValues().
2173     */
2174     M->was_assembled = PETSC_TRUE;
2175     M->assembled     = PETSC_FALSE;
2176   }
2177   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2178   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2179   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2180   ii   = aij->i;
2181   jj   = aij->j;
2182   aa   = aij->a;
2183   for (i=0; i<m; i++) {
2184     row   = rstart/bs + i;
2185     nz    = ii[i+1] - ii[i];
2186     cwork = jj;     jj += nz;
2187     vwork = aa;     aa += nz*bs*bs;
2188     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2189   }
2190 
2191   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2192   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2193   *newmat = M;
2194 
2195   /* save submatrix used in processor for next request */
2196   if (call ==  MAT_INITIAL_MATRIX) {
2197     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2198     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2199   }
2200   PetscFunctionReturn(0);
2201 }
2202 
2203 #undef __FUNCT__
2204 #define __FUNCT__ "MatPermute_MPIBAIJ"
2205 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2206 {
2207   MPI_Comm       comm,pcomm;
2208   PetscInt       clocal_size,nrows;
2209   const PetscInt *rows;
2210   PetscMPIInt    size;
2211   IS             crowp,lcolp;
2212   PetscErrorCode ierr;
2213 
2214   PetscFunctionBegin;
2215   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2216   /* make a collective version of 'rowp' */
2217   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2218   if (pcomm==comm) {
2219     crowp = rowp;
2220   } else {
2221     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2222     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2223     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2224     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2225   }
2226   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2227   /* make a local version of 'colp' */
2228   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2229   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2230   if (size==1) {
2231     lcolp = colp;
2232   } else {
2233     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2234   }
2235   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2236   /* now we just get the submatrix */
2237   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2238   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2239   /* clean up */
2240   if (pcomm!=comm) {
2241     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2242   }
2243   if (size>1) {
2244     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2245   }
2246   PetscFunctionReturn(0);
2247 }
2248 
2249 #undef __FUNCT__
2250 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2251 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2252 {
2253   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2254   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2255 
2256   PetscFunctionBegin;
2257   if (nghosts) *nghosts = B->nbs;
2258   if (ghosts) *ghosts = baij->garray;
2259   PetscFunctionReturn(0);
2260 }
2261 
2262 #undef __FUNCT__
2263 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2264 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2265 {
2266   Mat            B;
2267   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2268   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2269   Mat_SeqAIJ     *b;
2270   PetscErrorCode ierr;
2271   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2272   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2273   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2274 
2275   PetscFunctionBegin;
2276   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2277   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2278 
2279   /* ----------------------------------------------------------------
2280      Tell every processor the number of nonzeros per row
2281   */
2282   ierr = PetscMalloc1((A->rmap->N/bs),&lens);CHKERRQ(ierr);
2283   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2284     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2285   }
2286   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2287   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2288   displs    = recvcounts + size;
2289   for (i=0; i<size; i++) {
2290     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2291     displs[i]     = A->rmap->range[i]/bs;
2292   }
2293 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2294   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2295 #else
2296   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2297 #endif
2298   /* ---------------------------------------------------------------
2299      Create the sequential matrix of the same type as the local block diagonal
2300   */
2301   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2302   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2303   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2304   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2305   b    = (Mat_SeqAIJ*)B->data;
2306 
2307   /*--------------------------------------------------------------------
2308     Copy my part of matrix column indices over
2309   */
2310   sendcount  = ad->nz + bd->nz;
2311   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2312   a_jsendbuf = ad->j;
2313   b_jsendbuf = bd->j;
2314   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2315   cnt        = 0;
2316   for (i=0; i<n; i++) {
2317 
2318     /* put in lower diagonal portion */
2319     m = bd->i[i+1] - bd->i[i];
2320     while (m > 0) {
2321       /* is it above diagonal (in bd (compressed) numbering) */
2322       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2323       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2324       m--;
2325     }
2326 
2327     /* put in diagonal portion */
2328     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2329       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2330     }
2331 
2332     /* put in upper diagonal portion */
2333     while (m-- > 0) {
2334       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2335     }
2336   }
2337   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2338 
2339   /*--------------------------------------------------------------------
2340     Gather all column indices to all processors
2341   */
2342   for (i=0; i<size; i++) {
2343     recvcounts[i] = 0;
2344     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2345       recvcounts[i] += lens[j];
2346     }
2347   }
2348   displs[0] = 0;
2349   for (i=1; i<size; i++) {
2350     displs[i] = displs[i-1] + recvcounts[i-1];
2351   }
2352 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2353   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2354 #else
2355   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2356 #endif
2357   /*--------------------------------------------------------------------
2358     Assemble the matrix into useable form (note numerical values not yet set)
2359   */
2360   /* set the b->ilen (length of each row) values */
2361   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2362   /* set the b->i indices */
2363   b->i[0] = 0;
2364   for (i=1; i<=A->rmap->N/bs; i++) {
2365     b->i[i] = b->i[i-1] + lens[i-1];
2366   }
2367   ierr = PetscFree(lens);CHKERRQ(ierr);
2368   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2369   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2370   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2371 
2372   if (A->symmetric) {
2373     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2374   } else if (A->hermitian) {
2375     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2376   } else if (A->structurally_symmetric) {
2377     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2378   }
2379   *newmat = B;
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatSOR_MPIBAIJ"
2385 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2386 {
2387   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2388   PetscErrorCode ierr;
2389   Vec            bb1 = 0;
2390 
2391   PetscFunctionBegin;
2392   if (flag == SOR_APPLY_UPPER) {
2393     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2394     PetscFunctionReturn(0);
2395   }
2396 
2397   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2398     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2399   }
2400 
2401   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2402     if (flag & SOR_ZERO_INITIAL_GUESS) {
2403       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2404       its--;
2405     }
2406 
2407     while (its--) {
2408       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2409       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2410 
2411       /* update rhs: bb1 = bb - B*x */
2412       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2413       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2414 
2415       /* local sweep */
2416       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2417     }
2418   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2419     if (flag & SOR_ZERO_INITIAL_GUESS) {
2420       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2421       its--;
2422     }
2423     while (its--) {
2424       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2425       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2426 
2427       /* update rhs: bb1 = bb - B*x */
2428       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2429       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2430 
2431       /* local sweep */
2432       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2433     }
2434   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2435     if (flag & SOR_ZERO_INITIAL_GUESS) {
2436       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2437       its--;
2438     }
2439     while (its--) {
2440       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2441       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2442 
2443       /* update rhs: bb1 = bb - B*x */
2444       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2445       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2446 
2447       /* local sweep */
2448       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2449     }
2450   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2451 
2452   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 #undef __FUNCT__
2457 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ"
2458 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2459 {
2460   PetscErrorCode ierr;
2461   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2462   PetscInt       N,i,*garray = aij->garray;
2463   PetscInt       ib,jb,bs = A->rmap->bs;
2464   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2465   MatScalar      *a_val = a_aij->a;
2466   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2467   MatScalar      *b_val = b_aij->a;
2468   PetscReal      *work;
2469 
2470   PetscFunctionBegin;
2471   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2472   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2473   if (type == NORM_2) {
2474     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2475       for (jb=0; jb<bs; jb++) {
2476         for (ib=0; ib<bs; ib++) {
2477           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2478           a_val++;
2479         }
2480       }
2481     }
2482     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2483       for (jb=0; jb<bs; jb++) {
2484         for (ib=0; ib<bs; ib++) {
2485           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2486           b_val++;
2487         }
2488       }
2489     }
2490   } else if (type == NORM_1) {
2491     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2492       for (jb=0; jb<bs; jb++) {
2493         for (ib=0; ib<bs; ib++) {
2494           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2495           a_val++;
2496         }
2497       }
2498     }
2499     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2500       for (jb=0; jb<bs; jb++) {
2501        for (ib=0; ib<bs; ib++) {
2502           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2503           b_val++;
2504         }
2505       }
2506     }
2507   } else if (type == NORM_INFINITY) {
2508     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2509       for (jb=0; jb<bs; jb++) {
2510         for (ib=0; ib<bs; ib++) {
2511           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2512           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2513           a_val++;
2514         }
2515       }
2516     }
2517     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2518       for (jb=0; jb<bs; jb++) {
2519         for (ib=0; ib<bs; ib++) {
2520           int col = garray[b_aij->j[i]] * bs + jb;
2521           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2522           b_val++;
2523         }
2524       }
2525     }
2526   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2527   if (type == NORM_INFINITY) {
2528     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2529   } else {
2530     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2531   }
2532   ierr = PetscFree(work);CHKERRQ(ierr);
2533   if (type == NORM_2) {
2534     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2535   }
2536   PetscFunctionReturn(0);
2537 }
2538 
2539 #undef __FUNCT__
2540 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2541 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2542 {
2543   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2544   PetscErrorCode ierr;
2545 
2546   PetscFunctionBegin;
2547   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2548   PetscFunctionReturn(0);
2549 }
2550 
2551 
2552 /* -------------------------------------------------------------------*/
2553 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2554                                        MatGetRow_MPIBAIJ,
2555                                        MatRestoreRow_MPIBAIJ,
2556                                        MatMult_MPIBAIJ,
2557                                 /* 4*/ MatMultAdd_MPIBAIJ,
2558                                        MatMultTranspose_MPIBAIJ,
2559                                        MatMultTransposeAdd_MPIBAIJ,
2560                                        0,
2561                                        0,
2562                                        0,
2563                                 /*10*/ 0,
2564                                        0,
2565                                        0,
2566                                        MatSOR_MPIBAIJ,
2567                                        MatTranspose_MPIBAIJ,
2568                                 /*15*/ MatGetInfo_MPIBAIJ,
2569                                        MatEqual_MPIBAIJ,
2570                                        MatGetDiagonal_MPIBAIJ,
2571                                        MatDiagonalScale_MPIBAIJ,
2572                                        MatNorm_MPIBAIJ,
2573                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2574                                        MatAssemblyEnd_MPIBAIJ,
2575                                        MatSetOption_MPIBAIJ,
2576                                        MatZeroEntries_MPIBAIJ,
2577                                 /*24*/ MatZeroRows_MPIBAIJ,
2578                                        0,
2579                                        0,
2580                                        0,
2581                                        0,
2582                                 /*29*/ MatSetUp_MPIBAIJ,
2583                                        0,
2584                                        0,
2585                                        0,
2586                                        0,
2587                                 /*34*/ MatDuplicate_MPIBAIJ,
2588                                        0,
2589                                        0,
2590                                        0,
2591                                        0,
2592                                 /*39*/ MatAXPY_MPIBAIJ,
2593                                        MatGetSubMatrices_MPIBAIJ,
2594                                        MatIncreaseOverlap_MPIBAIJ,
2595                                        MatGetValues_MPIBAIJ,
2596                                        MatCopy_MPIBAIJ,
2597                                 /*44*/ 0,
2598                                        MatScale_MPIBAIJ,
2599                                        0,
2600                                        0,
2601                                        MatZeroRowsColumns_MPIBAIJ,
2602                                 /*49*/ 0,
2603                                        0,
2604                                        0,
2605                                        0,
2606                                        0,
2607                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2608                                        0,
2609                                        MatSetUnfactored_MPIBAIJ,
2610                                        MatPermute_MPIBAIJ,
2611                                        MatSetValuesBlocked_MPIBAIJ,
2612                                 /*59*/ MatGetSubMatrix_MPIBAIJ,
2613                                        MatDestroy_MPIBAIJ,
2614                                        MatView_MPIBAIJ,
2615                                        0,
2616                                        0,
2617                                 /*64*/ 0,
2618                                        0,
2619                                        0,
2620                                        0,
2621                                        0,
2622                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2623                                        0,
2624                                        0,
2625                                        0,
2626                                        0,
2627                                 /*74*/ 0,
2628                                        MatFDColoringApply_BAIJ,
2629                                        0,
2630                                        0,
2631                                        0,
2632                                 /*79*/ 0,
2633                                        0,
2634                                        0,
2635                                        0,
2636                                        MatLoad_MPIBAIJ,
2637                                 /*84*/ 0,
2638                                        0,
2639                                        0,
2640                                        0,
2641                                        0,
2642                                 /*89*/ 0,
2643                                        0,
2644                                        0,
2645                                        0,
2646                                        0,
2647                                 /*94*/ 0,
2648                                        0,
2649                                        0,
2650                                        0,
2651                                        0,
2652                                 /*99*/ 0,
2653                                        0,
2654                                        0,
2655                                        0,
2656                                        0,
2657                                 /*104*/0,
2658                                        MatRealPart_MPIBAIJ,
2659                                        MatImaginaryPart_MPIBAIJ,
2660                                        0,
2661                                        0,
2662                                 /*109*/0,
2663                                        0,
2664                                        0,
2665                                        0,
2666                                        0,
2667                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2668                                        0,
2669                                        MatGetGhosts_MPIBAIJ,
2670                                        0,
2671                                        0,
2672                                 /*119*/0,
2673                                        0,
2674                                        0,
2675                                        0,
2676                                        MatGetMultiProcBlock_MPIBAIJ,
2677                                 /*124*/0,
2678                                        MatGetColumnNorms_MPIBAIJ,
2679                                        MatInvertBlockDiagonal_MPIBAIJ,
2680                                        0,
2681                                        0,
2682                                /*129*/ 0,
2683                                        0,
2684                                        0,
2685                                        0,
2686                                        0,
2687                                /*134*/ 0,
2688                                        0,
2689                                        0,
2690                                        0,
2691                                        0,
2692                                /*139*/ 0,
2693                                        0,
2694                                        0,
2695                                        MatFDColoringSetUp_MPIXAIJ
2696 };
2697 
2698 #undef __FUNCT__
2699 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2700 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2701 {
2702   PetscFunctionBegin;
2703   *a = ((Mat_MPIBAIJ*)A->data)->A;
2704   PetscFunctionReturn(0);
2705 }
2706 
2707 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2708 
2709 #undef __FUNCT__
2710 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2711 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2712 {
2713   PetscInt       m,rstart,cstart,cend;
2714   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2715   const PetscInt *JJ    =0;
2716   PetscScalar    *values=0;
2717   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2718   PetscErrorCode ierr;
2719 
2720   PetscFunctionBegin;
2721   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2722   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2723   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2724   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2725   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2726   m      = B->rmap->n/bs;
2727   rstart = B->rmap->rstart/bs;
2728   cstart = B->cmap->rstart/bs;
2729   cend   = B->cmap->rend/bs;
2730 
2731   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2732   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2733   for (i=0; i<m; i++) {
2734     nz = ii[i+1] - ii[i];
2735     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2736     nz_max = PetscMax(nz_max,nz);
2737     JJ     = jj + ii[i];
2738     for (j=0; j<nz; j++) {
2739       if (*JJ >= cstart) break;
2740       JJ++;
2741     }
2742     d = 0;
2743     for (; j<nz; j++) {
2744       if (*JJ++ >= cend) break;
2745       d++;
2746     }
2747     d_nnz[i] = d;
2748     o_nnz[i] = nz - d;
2749   }
2750   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2751   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2752 
2753   values = (PetscScalar*)V;
2754   if (!values) {
2755     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2756     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2757   }
2758   for (i=0; i<m; i++) {
2759     PetscInt          row    = i + rstart;
2760     PetscInt          ncols  = ii[i+1] - ii[i];
2761     const PetscInt    *icols = jj + ii[i];
2762     if (!roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2763       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2764       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2765     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2766       PetscInt j;
2767       for (j=0; j<ncols; j++) {
2768         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2769         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2770       }
2771     }
2772   }
2773 
2774   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2775   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2776   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2777   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2778   PetscFunctionReturn(0);
2779 }
2780 
2781 #undef __FUNCT__
2782 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2783 /*@C
2784    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2785    (the default parallel PETSc format).
2786 
2787    Collective on MPI_Comm
2788 
2789    Input Parameters:
2790 +  A - the matrix
2791 .  bs - the block size
2792 .  i - the indices into j for the start of each local row (starts with zero)
2793 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2794 -  v - optional values in the matrix
2795 
2796    Level: developer
2797 
2798    Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2799    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2800    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2801    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2802    block column and the second index is over columns within a block.
2803 
2804 .keywords: matrix, aij, compressed row, sparse, parallel
2805 
2806 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2807 @*/
2808 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2809 {
2810   PetscErrorCode ierr;
2811 
2812   PetscFunctionBegin;
2813   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2814   PetscValidType(B,1);
2815   PetscValidLogicalCollectiveInt(B,bs,2);
2816   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2817   PetscFunctionReturn(0);
2818 }
2819 
2820 #undef __FUNCT__
2821 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2822 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2823 {
2824   Mat_MPIBAIJ    *b;
2825   PetscErrorCode ierr;
2826   PetscInt       i;
2827 
2828   PetscFunctionBegin;
2829   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2830   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2831   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2832   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2833 
2834   if (d_nnz) {
2835     for (i=0; i<B->rmap->n/bs; i++) {
2836       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2837     }
2838   }
2839   if (o_nnz) {
2840     for (i=0; i<B->rmap->n/bs; i++) {
2841       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2842     }
2843   }
2844 
2845   b      = (Mat_MPIBAIJ*)B->data;
2846   b->bs2 = bs*bs;
2847   b->mbs = B->rmap->n/bs;
2848   b->nbs = B->cmap->n/bs;
2849   b->Mbs = B->rmap->N/bs;
2850   b->Nbs = B->cmap->N/bs;
2851 
2852   for (i=0; i<=b->size; i++) {
2853     b->rangebs[i] = B->rmap->range[i]/bs;
2854   }
2855   b->rstartbs = B->rmap->rstart/bs;
2856   b->rendbs   = B->rmap->rend/bs;
2857   b->cstartbs = B->cmap->rstart/bs;
2858   b->cendbs   = B->cmap->rend/bs;
2859 
2860   if (!B->preallocated) {
2861     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2862     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2863     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2864     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2865     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2866     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2867     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2868     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2869     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2870   }
2871 
2872   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2873   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2874   B->preallocated = PETSC_TRUE;
2875   PetscFunctionReturn(0);
2876 }
2877 
2878 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2879 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2880 
2881 #undef __FUNCT__
2882 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2883 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2884 {
2885   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2886   PetscErrorCode ierr;
2887   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2888   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2889   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2890 
2891   PetscFunctionBegin;
2892   ierr  = PetscMalloc1((M+1),&ii);CHKERRQ(ierr);
2893   ii[0] = 0;
2894   for (i=0; i<M; i++) {
2895     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2896     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2897     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2898     /* remove one from count of matrix has diagonal */
2899     for (j=id[i]; j<id[i+1]; j++) {
2900       if (jd[j] == i) {ii[i+1]--;break;}
2901     }
2902   }
2903   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2904   cnt  = 0;
2905   for (i=0; i<M; i++) {
2906     for (j=io[i]; j<io[i+1]; j++) {
2907       if (garray[jo[j]] > rstart) break;
2908       jj[cnt++] = garray[jo[j]];
2909     }
2910     for (k=id[i]; k<id[i+1]; k++) {
2911       if (jd[k] != i) {
2912         jj[cnt++] = rstart + jd[k];
2913       }
2914     }
2915     for (; j<io[i+1]; j++) {
2916       jj[cnt++] = garray[jo[j]];
2917     }
2918   }
2919   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2920   PetscFunctionReturn(0);
2921 }
2922 
2923 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2924 
2925 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2926 
2927 #undef __FUNCT__
2928 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
2929 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2930 {
2931   PetscErrorCode ierr;
2932   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2933   Mat            B;
2934   Mat_MPIAIJ     *b;
2935 
2936   PetscFunctionBegin;
2937   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2938 
2939   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2940   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2941   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2942   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2943   ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2944   b    = (Mat_MPIAIJ*) B->data;
2945 
2946   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2947   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2948   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2949   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2950   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2951   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2952   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2953   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2954   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2955   if (reuse == MAT_REUSE_MATRIX) {
2956     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2957   } else {
2958    *newmat = B;
2959   }
2960   PetscFunctionReturn(0);
2961 }
2962 
2963 #if defined(PETSC_HAVE_MUMPS)
2964 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
2965 #endif
2966 
2967 /*MC
2968    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2969 
2970    Options Database Keys:
2971 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2972 . -mat_block_size <bs> - set the blocksize used to store the matrix
2973 - -mat_use_hash_table <fact>
2974 
2975   Level: beginner
2976 
2977 .seealso: MatCreateMPIBAIJ
2978 M*/
2979 
2980 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2981 
2982 #undef __FUNCT__
2983 #define __FUNCT__ "MatCreate_MPIBAIJ"
2984 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2985 {
2986   Mat_MPIBAIJ    *b;
2987   PetscErrorCode ierr;
2988   PetscBool      flg;
2989 
2990   PetscFunctionBegin;
2991   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2992   B->data = (void*)b;
2993 
2994   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2995   B->assembled = PETSC_FALSE;
2996 
2997   B->insertmode = NOT_SET_VALUES;
2998   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2999   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3000 
3001   /* build local table of row and column ownerships */
3002   ierr = PetscMalloc1((b->size+1),&b->rangebs);CHKERRQ(ierr);
3003 
3004   /* build cache for off array entries formed */
3005   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3006 
3007   b->donotstash  = PETSC_FALSE;
3008   b->colmap      = NULL;
3009   b->garray      = NULL;
3010   b->roworiented = PETSC_TRUE;
3011 
3012   /* stuff used in block assembly */
3013   b->barray = 0;
3014 
3015   /* stuff used for matrix vector multiply */
3016   b->lvec  = 0;
3017   b->Mvctx = 0;
3018 
3019   /* stuff for MatGetRow() */
3020   b->rowindices   = 0;
3021   b->rowvalues    = 0;
3022   b->getrowactive = PETSC_FALSE;
3023 
3024   /* hash table stuff */
3025   b->ht           = 0;
3026   b->hd           = 0;
3027   b->ht_size      = 0;
3028   b->ht_flag      = PETSC_FALSE;
3029   b->ht_fact      = 0;
3030   b->ht_total_ct  = 0;
3031   b->ht_insert_ct = 0;
3032 
3033   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3034   b->ijonly = PETSC_FALSE;
3035 
3036   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3037   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
3038   if (flg) {
3039     PetscReal fact = 1.39;
3040     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3041     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3042     if (fact <= 1.0) fact = 1.39;
3043     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3044     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3045   }
3046   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3047 
3048 #if defined(PETSC_HAVE_MUMPS)
3049   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3050 #endif
3051   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3052   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3053   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3054   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3055   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3056   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3057   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3058   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3059   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3060   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3061   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3062   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3063   PetscFunctionReturn(0);
3064 }
3065 
3066 /*MC
3067    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3068 
3069    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3070    and MATMPIBAIJ otherwise.
3071 
3072    Options Database Keys:
3073 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3074 
3075   Level: beginner
3076 
3077 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3078 M*/
3079 
3080 #undef __FUNCT__
3081 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3082 /*@C
3083    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3084    (block compressed row).  For good matrix assembly performance
3085    the user should preallocate the matrix storage by setting the parameters
3086    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3087    performance can be increased by more than a factor of 50.
3088 
3089    Collective on Mat
3090 
3091    Input Parameters:
3092 +  A - the matrix
3093 .  bs   - size of block
3094 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3095            submatrix  (same for all local rows)
3096 .  d_nnz - array containing the number of block nonzeros in the various block rows
3097            of the in diagonal portion of the local (possibly different for each block
3098            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3099            set it even if it is zero.
3100 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3101            submatrix (same for all local rows).
3102 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3103            off-diagonal portion of the local submatrix (possibly different for
3104            each block row) or NULL.
3105 
3106    If the *_nnz parameter is given then the *_nz parameter is ignored
3107 
3108    Options Database Keys:
3109 +   -mat_block_size - size of the blocks to use
3110 -   -mat_use_hash_table <fact>
3111 
3112    Notes:
3113    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3114    than it must be used on all processors that share the object for that argument.
3115 
3116    Storage Information:
3117    For a square global matrix we define each processor's diagonal portion
3118    to be its local rows and the corresponding columns (a square submatrix);
3119    each processor's off-diagonal portion encompasses the remainder of the
3120    local matrix (a rectangular submatrix).
3121 
3122    The user can specify preallocated storage for the diagonal part of
3123    the local submatrix with either d_nz or d_nnz (not both).  Set
3124    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3125    memory allocation.  Likewise, specify preallocated storage for the
3126    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3127 
3128    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3129    the figure below we depict these three local rows and all columns (0-11).
3130 
3131 .vb
3132            0 1 2 3 4 5 6 7 8 9 10 11
3133           --------------------------
3134    row 3  |o o o d d d o o o o  o  o
3135    row 4  |o o o d d d o o o o  o  o
3136    row 5  |o o o d d d o o o o  o  o
3137           --------------------------
3138 .ve
3139 
3140    Thus, any entries in the d locations are stored in the d (diagonal)
3141    submatrix, and any entries in the o locations are stored in the
3142    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3143    stored simply in the MATSEQBAIJ format for compressed row storage.
3144 
3145    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3146    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3147    In general, for PDE problems in which most nonzeros are near the diagonal,
3148    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3149    or you will get TERRIBLE performance; see the users' manual chapter on
3150    matrices.
3151 
3152    You can call MatGetInfo() to get information on how effective the preallocation was;
3153    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3154    You can also run with the option -info and look for messages with the string
3155    malloc in them to see if additional memory allocation was needed.
3156 
3157    Level: intermediate
3158 
3159 .keywords: matrix, block, aij, compressed row, sparse, parallel
3160 
3161 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3162 @*/
3163 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3164 {
3165   PetscErrorCode ierr;
3166 
3167   PetscFunctionBegin;
3168   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3169   PetscValidType(B,1);
3170   PetscValidLogicalCollectiveInt(B,bs,2);
3171   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3172   PetscFunctionReturn(0);
3173 }
3174 
3175 #undef __FUNCT__
3176 #define __FUNCT__ "MatCreateBAIJ"
3177 /*@C
3178    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3179    (block compressed row).  For good matrix assembly performance
3180    the user should preallocate the matrix storage by setting the parameters
3181    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3182    performance can be increased by more than a factor of 50.
3183 
3184    Collective on MPI_Comm
3185 
3186    Input Parameters:
3187 +  comm - MPI communicator
3188 .  bs   - size of blockk
3189 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3190            This value should be the same as the local size used in creating the
3191            y vector for the matrix-vector product y = Ax.
3192 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3193            This value should be the same as the local size used in creating the
3194            x vector for the matrix-vector product y = Ax.
3195 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3196 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3197 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3198            submatrix  (same for all local rows)
3199 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3200            of the in diagonal portion of the local (possibly different for each block
3201            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3202            and set it even if it is zero.
3203 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3204            submatrix (same for all local rows).
3205 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3206            off-diagonal portion of the local submatrix (possibly different for
3207            each block row) or NULL.
3208 
3209    Output Parameter:
3210 .  A - the matrix
3211 
3212    Options Database Keys:
3213 +   -mat_block_size - size of the blocks to use
3214 -   -mat_use_hash_table <fact>
3215 
3216    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3217    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3218    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3219 
3220    Notes:
3221    If the *_nnz parameter is given then the *_nz parameter is ignored
3222 
3223    A nonzero block is any block that as 1 or more nonzeros in it
3224 
3225    The user MUST specify either the local or global matrix dimensions
3226    (possibly both).
3227 
3228    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3229    than it must be used on all processors that share the object for that argument.
3230 
3231    Storage Information:
3232    For a square global matrix we define each processor's diagonal portion
3233    to be its local rows and the corresponding columns (a square submatrix);
3234    each processor's off-diagonal portion encompasses the remainder of the
3235    local matrix (a rectangular submatrix).
3236 
3237    The user can specify preallocated storage for the diagonal part of
3238    the local submatrix with either d_nz or d_nnz (not both).  Set
3239    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3240    memory allocation.  Likewise, specify preallocated storage for the
3241    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3242 
3243    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3244    the figure below we depict these three local rows and all columns (0-11).
3245 
3246 .vb
3247            0 1 2 3 4 5 6 7 8 9 10 11
3248           --------------------------
3249    row 3  |o o o d d d o o o o  o  o
3250    row 4  |o o o d d d o o o o  o  o
3251    row 5  |o o o d d d o o o o  o  o
3252           --------------------------
3253 .ve
3254 
3255    Thus, any entries in the d locations are stored in the d (diagonal)
3256    submatrix, and any entries in the o locations are stored in the
3257    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3258    stored simply in the MATSEQBAIJ format for compressed row storage.
3259 
3260    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3261    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3262    In general, for PDE problems in which most nonzeros are near the diagonal,
3263    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3264    or you will get TERRIBLE performance; see the users' manual chapter on
3265    matrices.
3266 
3267    Level: intermediate
3268 
3269 .keywords: matrix, block, aij, compressed row, sparse, parallel
3270 
3271 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3272 @*/
3273 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3274 {
3275   PetscErrorCode ierr;
3276   PetscMPIInt    size;
3277 
3278   PetscFunctionBegin;
3279   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3280   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3281   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3282   if (size > 1) {
3283     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3284     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3285   } else {
3286     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3287     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3288   }
3289   PetscFunctionReturn(0);
3290 }
3291 
3292 #undef __FUNCT__
3293 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3294 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3295 {
3296   Mat            mat;
3297   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3298   PetscErrorCode ierr;
3299   PetscInt       len=0;
3300 
3301   PetscFunctionBegin;
3302   *newmat = 0;
3303   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3304   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3305   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3306   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3307 
3308   mat->factortype   = matin->factortype;
3309   mat->preallocated = PETSC_TRUE;
3310   mat->assembled    = PETSC_TRUE;
3311   mat->insertmode   = NOT_SET_VALUES;
3312 
3313   a             = (Mat_MPIBAIJ*)mat->data;
3314   mat->rmap->bs = matin->rmap->bs;
3315   a->bs2        = oldmat->bs2;
3316   a->mbs        = oldmat->mbs;
3317   a->nbs        = oldmat->nbs;
3318   a->Mbs        = oldmat->Mbs;
3319   a->Nbs        = oldmat->Nbs;
3320 
3321   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3322   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3323 
3324   a->size         = oldmat->size;
3325   a->rank         = oldmat->rank;
3326   a->donotstash   = oldmat->donotstash;
3327   a->roworiented  = oldmat->roworiented;
3328   a->rowindices   = 0;
3329   a->rowvalues    = 0;
3330   a->getrowactive = PETSC_FALSE;
3331   a->barray       = 0;
3332   a->rstartbs     = oldmat->rstartbs;
3333   a->rendbs       = oldmat->rendbs;
3334   a->cstartbs     = oldmat->cstartbs;
3335   a->cendbs       = oldmat->cendbs;
3336 
3337   /* hash table stuff */
3338   a->ht           = 0;
3339   a->hd           = 0;
3340   a->ht_size      = 0;
3341   a->ht_flag      = oldmat->ht_flag;
3342   a->ht_fact      = oldmat->ht_fact;
3343   a->ht_total_ct  = 0;
3344   a->ht_insert_ct = 0;
3345 
3346   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3347   if (oldmat->colmap) {
3348 #if defined(PETSC_USE_CTABLE)
3349     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3350 #else
3351     ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr);
3352     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3353     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3354 #endif
3355   } else a->colmap = 0;
3356 
3357   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3358     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3359     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3360     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3361   } else a->garray = 0;
3362 
3363   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3364   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3365   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3366   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3367   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3368 
3369   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3370   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3371   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3372   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3373   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3374   *newmat = mat;
3375   PetscFunctionReturn(0);
3376 }
3377 
3378 #undef __FUNCT__
3379 #define __FUNCT__ "MatLoad_MPIBAIJ"
3380 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3381 {
3382   PetscErrorCode ierr;
3383   int            fd;
3384   PetscInt       i,nz,j,rstart,rend;
3385   PetscScalar    *vals,*buf;
3386   MPI_Comm       comm;
3387   MPI_Status     status;
3388   PetscMPIInt    rank,size,maxnz;
3389   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3390   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3391   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3392   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3393   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3394   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3395 
3396   PetscFunctionBegin;
3397   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3398   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3399   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3400   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3401 
3402   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3403   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3404   if (!rank) {
3405     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3406     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3407     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3408   }
3409 
3410   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3411 
3412   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3413   M    = header[1]; N = header[2];
3414 
3415   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3416   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3417   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3418 
3419   /* If global sizes are set, check if they are consistent with that given in the file */
3420   if (sizesset) {
3421     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3422   }
3423   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3424   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3425 
3426   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3427 
3428   /*
3429      This code adds extra rows to make sure the number of rows is
3430      divisible by the blocksize
3431   */
3432   Mbs        = M/bs;
3433   extra_rows = bs - M + bs*Mbs;
3434   if (extra_rows == bs) extra_rows = 0;
3435   else                  Mbs++;
3436   if (extra_rows && !rank) {
3437     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3438   }
3439 
3440   /* determine ownership of all rows */
3441   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3442     mbs = Mbs/size + ((Mbs % size) > rank);
3443     m   = mbs*bs;
3444   } else { /* User set */
3445     m   = newmat->rmap->n;
3446     mbs = m/bs;
3447   }
3448   ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
3449   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3450 
3451   /* process 0 needs enough room for process with most rows */
3452   if (!rank) {
3453     mmax = rowners[1];
3454     for (i=2; i<=size; i++) {
3455       mmax = PetscMax(mmax,rowners[i]);
3456     }
3457     mmax*=bs;
3458   } else mmax = -1;             /* unused, but compiler warns anyway */
3459 
3460   rowners[0] = 0;
3461   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3462   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3463   rstart = rowners[rank];
3464   rend   = rowners[rank+1];
3465 
3466   /* distribute row lengths to all processors */
3467   ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr);
3468   if (!rank) {
3469     mend = m;
3470     if (size == 1) mend = mend - extra_rows;
3471     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3472     for (j=mend; j<m; j++) locrowlens[j] = 1;
3473     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3474     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3475     for (j=0; j<m; j++) {
3476       procsnz[0] += locrowlens[j];
3477     }
3478     for (i=1; i<size; i++) {
3479       mend = browners[i+1] - browners[i];
3480       if (i == size-1) mend = mend - extra_rows;
3481       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3482       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3483       /* calculate the number of nonzeros on each processor */
3484       for (j=0; j<browners[i+1]-browners[i]; j++) {
3485         procsnz[i] += rowlengths[j];
3486       }
3487       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3488     }
3489     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3490   } else {
3491     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3492   }
3493 
3494   if (!rank) {
3495     /* determine max buffer needed and allocate it */
3496     maxnz = procsnz[0];
3497     for (i=1; i<size; i++) {
3498       maxnz = PetscMax(maxnz,procsnz[i]);
3499     }
3500     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3501 
3502     /* read in my part of the matrix column indices  */
3503     nz     = procsnz[0];
3504     ierr   = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr);
3505     mycols = ibuf;
3506     if (size == 1) nz -= extra_rows;
3507     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3508     if (size == 1) {
3509       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3510     }
3511 
3512     /* read in every ones (except the last) and ship off */
3513     for (i=1; i<size-1; i++) {
3514       nz   = procsnz[i];
3515       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3516       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3517     }
3518     /* read in the stuff for the last proc */
3519     if (size != 1) {
3520       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3521       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3522       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3523       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3524     }
3525     ierr = PetscFree(cols);CHKERRQ(ierr);
3526   } else {
3527     /* determine buffer space needed for message */
3528     nz = 0;
3529     for (i=0; i<m; i++) {
3530       nz += locrowlens[i];
3531     }
3532     ierr   = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr);
3533     mycols = ibuf;
3534     /* receive message of column indices*/
3535     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3536     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3537     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3538   }
3539 
3540   /* loop over local rows, determining number of off diagonal entries */
3541   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
3542   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
3543   rowcount = 0; nzcount = 0;
3544   for (i=0; i<mbs; i++) {
3545     dcount  = 0;
3546     odcount = 0;
3547     for (j=0; j<bs; j++) {
3548       kmax = locrowlens[rowcount];
3549       for (k=0; k<kmax; k++) {
3550         tmp = mycols[nzcount++]/bs;
3551         if (!mask[tmp]) {
3552           mask[tmp] = 1;
3553           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3554           else masked1[dcount++] = tmp;
3555         }
3556       }
3557       rowcount++;
3558     }
3559 
3560     dlens[i]  = dcount;
3561     odlens[i] = odcount;
3562 
3563     /* zero out the mask elements we set */
3564     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3565     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3566   }
3567 
3568 
3569   if (!sizesset) {
3570     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3571   }
3572   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3573 
3574   if (!rank) {
3575     ierr = PetscMalloc1((maxnz+1),&buf);CHKERRQ(ierr);
3576     /* read in my part of the matrix numerical values  */
3577     nz     = procsnz[0];
3578     vals   = buf;
3579     mycols = ibuf;
3580     if (size == 1) nz -= extra_rows;
3581     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3582     if (size == 1) {
3583       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3584     }
3585 
3586     /* insert into matrix */
3587     jj = rstart*bs;
3588     for (i=0; i<m; i++) {
3589       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3590       mycols += locrowlens[i];
3591       vals   += locrowlens[i];
3592       jj++;
3593     }
3594     /* read in other processors (except the last one) and ship out */
3595     for (i=1; i<size-1; i++) {
3596       nz   = procsnz[i];
3597       vals = buf;
3598       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3599       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3600     }
3601     /* the last proc */
3602     if (size != 1) {
3603       nz   = procsnz[i] - extra_rows;
3604       vals = buf;
3605       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3606       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3607       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3608     }
3609     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3610   } else {
3611     /* receive numeric values */
3612     ierr = PetscMalloc1((nz+1),&buf);CHKERRQ(ierr);
3613 
3614     /* receive message of values*/
3615     vals   = buf;
3616     mycols = ibuf;
3617     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3618 
3619     /* insert into matrix */
3620     jj = rstart*bs;
3621     for (i=0; i<m; i++) {
3622       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3623       mycols += locrowlens[i];
3624       vals   += locrowlens[i];
3625       jj++;
3626     }
3627   }
3628   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3629   ierr = PetscFree(buf);CHKERRQ(ierr);
3630   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3631   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3632   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3633   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3634   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3635   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3636   PetscFunctionReturn(0);
3637 }
3638 
3639 #undef __FUNCT__
3640 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3641 /*@
3642    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3643 
3644    Input Parameters:
3645 .  mat  - the matrix
3646 .  fact - factor
3647 
3648    Not Collective, each process can use a different factor
3649 
3650    Level: advanced
3651 
3652   Notes:
3653    This can also be set by the command line option: -mat_use_hash_table <fact>
3654 
3655 .keywords: matrix, hashtable, factor, HT
3656 
3657 .seealso: MatSetOption()
3658 @*/
3659 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3660 {
3661   PetscErrorCode ierr;
3662 
3663   PetscFunctionBegin;
3664   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3665   PetscFunctionReturn(0);
3666 }
3667 
3668 #undef __FUNCT__
3669 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3670 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3671 {
3672   Mat_MPIBAIJ *baij;
3673 
3674   PetscFunctionBegin;
3675   baij          = (Mat_MPIBAIJ*)mat->data;
3676   baij->ht_fact = fact;
3677   PetscFunctionReturn(0);
3678 }
3679 
3680 #undef __FUNCT__
3681 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3682 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3683 {
3684   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3685 
3686   PetscFunctionBegin;
3687   *Ad     = a->A;
3688   *Ao     = a->B;
3689   *colmap = a->garray;
3690   PetscFunctionReturn(0);
3691 }
3692 
3693 /*
3694     Special version for direct calls from Fortran (to eliminate two function call overheads
3695 */
3696 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3697 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3698 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3699 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3700 #endif
3701 
3702 #undef __FUNCT__
3703 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3704 /*@C
3705   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3706 
3707   Collective on Mat
3708 
3709   Input Parameters:
3710 + mat - the matrix
3711 . min - number of input rows
3712 . im - input rows
3713 . nin - number of input columns
3714 . in - input columns
3715 . v - numerical values input
3716 - addvin - INSERT_VALUES or ADD_VALUES
3717 
3718   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3719 
3720   Level: advanced
3721 
3722 .seealso:   MatSetValuesBlocked()
3723 @*/
3724 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3725 {
3726   /* convert input arguments to C version */
3727   Mat        mat  = *matin;
3728   PetscInt   m    = *min, n = *nin;
3729   InsertMode addv = *addvin;
3730 
3731   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3732   const MatScalar *value;
3733   MatScalar       *barray     = baij->barray;
3734   PetscBool       roworiented = baij->roworiented;
3735   PetscErrorCode  ierr;
3736   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3737   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3738   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3739 
3740   PetscFunctionBegin;
3741   /* tasks normally handled by MatSetValuesBlocked() */
3742   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3743 #if defined(PETSC_USE_DEBUG)
3744   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3745   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3746 #endif
3747   if (mat->assembled) {
3748     mat->was_assembled = PETSC_TRUE;
3749     mat->assembled     = PETSC_FALSE;
3750   }
3751   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3752 
3753 
3754   if (!barray) {
3755     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3756     baij->barray = barray;
3757   }
3758 
3759   if (roworiented) stepval = (n-1)*bs;
3760   else stepval = (m-1)*bs;
3761 
3762   for (i=0; i<m; i++) {
3763     if (im[i] < 0) continue;
3764 #if defined(PETSC_USE_DEBUG)
3765     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3766 #endif
3767     if (im[i] >= rstart && im[i] < rend) {
3768       row = im[i] - rstart;
3769       for (j=0; j<n; j++) {
3770         /* If NumCol = 1 then a copy is not required */
3771         if ((roworiented) && (n == 1)) {
3772           barray = (MatScalar*)v + i*bs2;
3773         } else if ((!roworiented) && (m == 1)) {
3774           barray = (MatScalar*)v + j*bs2;
3775         } else { /* Here a copy is required */
3776           if (roworiented) {
3777             value = v + i*(stepval+bs)*bs + j*bs;
3778           } else {
3779             value = v + j*(stepval+bs)*bs + i*bs;
3780           }
3781           for (ii=0; ii<bs; ii++,value+=stepval) {
3782             for (jj=0; jj<bs; jj++) {
3783               *barray++ = *value++;
3784             }
3785           }
3786           barray -=bs2;
3787         }
3788 
3789         if (in[j] >= cstart && in[j] < cend) {
3790           col  = in[j] - cstart;
3791           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3792         } else if (in[j] < 0) continue;
3793 #if defined(PETSC_USE_DEBUG)
3794         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3795 #endif
3796         else {
3797           if (mat->was_assembled) {
3798             if (!baij->colmap) {
3799               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3800             }
3801 
3802 #if defined(PETSC_USE_DEBUG)
3803 #if defined(PETSC_USE_CTABLE)
3804             { PetscInt data;
3805               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3806               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3807             }
3808 #else
3809             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3810 #endif
3811 #endif
3812 #if defined(PETSC_USE_CTABLE)
3813             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3814             col  = (col - 1)/bs;
3815 #else
3816             col = (baij->colmap[in[j]] - 1)/bs;
3817 #endif
3818             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3819               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3820               col  =  in[j];
3821             }
3822           } else col = in[j];
3823           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3824         }
3825       }
3826     } else {
3827       if (!baij->donotstash) {
3828         if (roworiented) {
3829           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3830         } else {
3831           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3832         }
3833       }
3834     }
3835   }
3836 
3837   /* task normally handled by MatSetValuesBlocked() */
3838   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3839   PetscFunctionReturn(0);
3840 }
3841 
3842 #undef __FUNCT__
3843 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
3844 /*@
3845      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
3846          CSR format the local rows.
3847 
3848    Collective on MPI_Comm
3849 
3850    Input Parameters:
3851 +  comm - MPI communicator
3852 .  bs - the block size, only a block size of 1 is supported
3853 .  m - number of local rows (Cannot be PETSC_DECIDE)
3854 .  n - This value should be the same as the local size used in creating the
3855        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3856        calculated if N is given) For square matrices n is almost always m.
3857 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3858 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3859 .   i - row indices
3860 .   j - column indices
3861 -   a - matrix values
3862 
3863    Output Parameter:
3864 .   mat - the matrix
3865 
3866    Level: intermediate
3867 
3868    Notes:
3869        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3870      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3871      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3872 
3873      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3874      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3875      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3876      with column-major ordering within blocks.
3877 
3878        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3879 
3880 .keywords: matrix, aij, compressed row, sparse, parallel
3881 
3882 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3883           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3884 @*/
3885 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3886 {
3887   PetscErrorCode ierr;
3888 
3889   PetscFunctionBegin;
3890   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3891   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3892   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3893   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3894   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3895   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3896   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3897   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3898   PetscFunctionReturn(0);
3899 }
3900