xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision f692024ea6ceda132bc9ff30ca7a31e85cfbbcb2)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 
5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 extern PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
17 {
18   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
19   PetscErrorCode ierr;
20   PetscInt       i,*idxb = 0;
21   PetscScalar    *va,*vb;
22   Vec            vtmp;
23 
24   PetscFunctionBegin;
25   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
26   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
27   if (idx) {
28     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
29   }
30 
31   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
32   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
33   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
34   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
35 
36   for (i=0; i<A->rmap->n; i++){
37     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 EXTERN_C_BEGIN
48 #undef __FUNCT__
49 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
50 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
51 {
52   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
57   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 EXTERN_C_END
61 
62 EXTERN_C_BEGIN
63 #undef __FUNCT__
64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
65 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
66 {
67   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
68   PetscErrorCode ierr;
69 
70   PetscFunctionBegin;
71   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
72   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
73   PetscFunctionReturn(0);
74 }
75 EXTERN_C_END
76 
77 /*
78      Local utility routine that creates a mapping from the global column
79    number to the local number in the off-diagonal part of the local
80    storage of the matrix.  This is done in a non scalable way since the
81    length of colmap equals the global matrix length.
82 */
83 #undef __FUNCT__
84 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
85 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
86 {
87   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
88   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
89   PetscErrorCode ierr;
90   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
91 
92   PetscFunctionBegin;
93 #if defined (PETSC_USE_CTABLE)
94   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
95   for (i=0; i<nbs; i++){
96     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
97   }
98 #else
99   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
100   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
101   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
102   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
103 #endif
104   PetscFunctionReturn(0);
105 }
106 
107 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
108 { \
109  \
110     brow = row/bs;  \
111     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
112     rmax = aimax[brow]; nrow = ailen[brow]; \
113       bcol = col/bs; \
114       ridx = row % bs; cidx = col % bs; \
115       low = 0; high = nrow; \
116       while (high-low > 3) { \
117         t = (low+high)/2; \
118         if (rp[t] > bcol) high = t; \
119         else              low  = t; \
120       } \
121       for (_i=low; _i<high; _i++) { \
122         if (rp[_i] > bcol) break; \
123         if (rp[_i] == bcol) { \
124           bap  = ap +  bs2*_i + bs*cidx + ridx; \
125           if (addv == ADD_VALUES) *bap += value;  \
126           else                    *bap  = value;  \
127           goto a_noinsert; \
128         } \
129       } \
130       if (a->nonew == 1) goto a_noinsert; \
131       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
132       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
133       N = nrow++ - 1;  \
134       /* shift up all the later entries in this row */ \
135       for (ii=N; ii>=_i; ii--) { \
136         rp[ii+1] = rp[ii]; \
137         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
138       } \
139       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
140       rp[_i]                      = bcol;  \
141       ap[bs2*_i + bs*cidx + ridx] = value;  \
142       a_noinsert:; \
143     ailen[brow] = nrow; \
144 }
145 
146 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
147 { \
148     brow = row/bs;  \
149     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
150     rmax = bimax[brow]; nrow = bilen[brow]; \
151       bcol = col/bs; \
152       ridx = row % bs; cidx = col % bs; \
153       low = 0; high = nrow; \
154       while (high-low > 3) { \
155         t = (low+high)/2; \
156         if (rp[t] > bcol) high = t; \
157         else              low  = t; \
158       } \
159       for (_i=low; _i<high; _i++) { \
160         if (rp[_i] > bcol) break; \
161         if (rp[_i] == bcol) { \
162           bap  = ap +  bs2*_i + bs*cidx + ridx; \
163           if (addv == ADD_VALUES) *bap += value;  \
164           else                    *bap  = value;  \
165           goto b_noinsert; \
166         } \
167       } \
168       if (b->nonew == 1) goto b_noinsert; \
169       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
170       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
171       CHKMEMQ;\
172       N = nrow++ - 1;  \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
177       } \
178       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
179       rp[_i]                      = bcol;  \
180       ap[bs2*_i + bs*cidx + ridx] = value;  \
181       b_noinsert:; \
182     bilen[brow] = nrow; \
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatSetValues_MPIBAIJ"
187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
188 {
189   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
190   MatScalar      value;
191   PetscBool      roworiented = baij->roworiented;
192   PetscErrorCode ierr;
193   PetscInt       i,j,row,col;
194   PetscInt       rstart_orig=mat->rmap->rstart;
195   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
196   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
197 
198   /* Some Variables required in the macro */
199   Mat            A = baij->A;
200   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
201   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
202   MatScalar      *aa=a->a;
203 
204   Mat            B = baij->B;
205   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
206   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
207   MatScalar      *ba=b->a;
208 
209   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
210   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
211   MatScalar      *ap,*bap;
212 
213   PetscFunctionBegin;
214   if (v) PetscValidScalarPointer(v,6);
215   for (i=0; i<m; i++) {
216     if (im[i] < 0) continue;
217 #if defined(PETSC_USE_DEBUG)
218     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
219 #endif
220     if (im[i] >= rstart_orig && im[i] < rend_orig) {
221       row = im[i] - rstart_orig;
222       for (j=0; j<n; j++) {
223         if (in[j] >= cstart_orig && in[j] < cend_orig){
224           col = in[j] - cstart_orig;
225           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
226           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
227           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
228         } else if (in[j] < 0) continue;
229 #if defined(PETSC_USE_DEBUG)
230         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
231 #endif
232         else {
233           if (mat->was_assembled) {
234             if (!baij->colmap) {
235               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
236             }
237 #if defined (PETSC_USE_CTABLE)
238             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
239             col  = col - 1;
240 #else
241             col = baij->colmap[in[j]/bs] - 1;
242 #endif
243             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
244               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
245               col =  in[j];
246               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
247               B = baij->B;
248               b = (Mat_SeqBAIJ*)(B)->data;
249               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
250               ba=b->a;
251             } else col += in[j]%bs;
252           } else col = in[j];
253           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
254           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
255           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
256         }
257       }
258     } else {
259       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
260       if (!baij->donotstash) {
261         if (roworiented) {
262           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
263         } else {
264           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
265         }
266       }
267     }
268   }
269   PetscFunctionReturn(0);
270 }
271 
272 #undef __FUNCT__
273 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
274 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
275 {
276   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
277   const PetscScalar *value;
278   MatScalar         *barray=baij->barray;
279   PetscBool         roworiented = baij->roworiented;
280   PetscErrorCode    ierr;
281   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
282   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
283   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
284 
285   PetscFunctionBegin;
286   if(!barray) {
287     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
288     baij->barray = barray;
289   }
290 
291   if (roworiented) {
292     stepval = (n-1)*bs;
293   } else {
294     stepval = (m-1)*bs;
295   }
296   for (i=0; i<m; i++) {
297     if (im[i] < 0) continue;
298 #if defined(PETSC_USE_DEBUG)
299     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
300 #endif
301     if (im[i] >= rstart && im[i] < rend) {
302       row = im[i] - rstart;
303       for (j=0; j<n; j++) {
304         /* If NumCol = 1 then a copy is not required */
305         if ((roworiented) && (n == 1)) {
306           barray = (MatScalar*)v + i*bs2;
307         } else if((!roworiented) && (m == 1)) {
308           barray = (MatScalar*)v + j*bs2;
309         } else { /* Here a copy is required */
310           if (roworiented) {
311             value = v + (i*(stepval+bs) + j)*bs;
312           } else {
313 	    value = v + (j*(stepval+bs) + i)*bs;
314           }
315           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
316             for (jj=0; jj<bs; jj++) {
317               barray[jj]  = value[jj];
318             }
319             barray += bs;
320           }
321           barray -= bs2;
322         }
323 
324         if (in[j] >= cstart && in[j] < cend){
325           col  = in[j] - cstart;
326           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
327         }
328         else if (in[j] < 0) continue;
329 #if defined(PETSC_USE_DEBUG)
330         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
331 #endif
332         else {
333           if (mat->was_assembled) {
334             if (!baij->colmap) {
335               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
336             }
337 
338 #if defined(PETSC_USE_DEBUG)
339 #if defined (PETSC_USE_CTABLE)
340             { PetscInt data;
341               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
342               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
343             }
344 #else
345             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346 #endif
347 #endif
348 #if defined (PETSC_USE_CTABLE)
349 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
350             col  = (col - 1)/bs;
351 #else
352             col = (baij->colmap[in[j]] - 1)/bs;
353 #endif
354             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
355               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
356               col =  in[j];
357             }
358           }
359           else col = in[j];
360           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
361         }
362       }
363     } else {
364       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
365       if (!baij->donotstash) {
366         if (roworiented) {
367           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
368         } else {
369           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
370         }
371       }
372     }
373   }
374   PetscFunctionReturn(0);
375 }
376 
377 #define HASH_KEY 0.6180339887
378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
379 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
380 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
381 #undef __FUNCT__
382 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
383 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
384 {
385   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
386   PetscBool      roworiented = baij->roworiented;
387   PetscErrorCode ierr;
388   PetscInt       i,j,row,col;
389   PetscInt       rstart_orig=mat->rmap->rstart;
390   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
391   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
392   PetscReal      tmp;
393   MatScalar      **HD = baij->hd,value;
394 #if defined(PETSC_USE_DEBUG)
395   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
396 #endif
397 
398   PetscFunctionBegin;
399   if (v) PetscValidScalarPointer(v,6);
400   for (i=0; i<m; i++) {
401 #if defined(PETSC_USE_DEBUG)
402     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
403     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
404 #endif
405       row = im[i];
406     if (row >= rstart_orig && row < rend_orig) {
407       for (j=0; j<n; j++) {
408         col = in[j];
409         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
410         /* Look up PetscInto the Hash Table */
411         key = (row/bs)*Nbs+(col/bs)+1;
412         h1  = HASH(size,key,tmp);
413 
414 
415         idx = h1;
416 #if defined(PETSC_USE_DEBUG)
417         insert_ct++;
418         total_ct++;
419         if (HT[idx] != key) {
420           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
421           if (idx == size) {
422             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
423             if (idx == h1) {
424               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
425             }
426           }
427         }
428 #else
429         if (HT[idx] != key) {
430           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
431           if (idx == size) {
432             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
433             if (idx == h1) {
434               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
435             }
436           }
437         }
438 #endif
439         /* A HASH table entry is found, so insert the values at the correct address */
440         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
441         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
442       }
443     } else {
444       if (!baij->donotstash) {
445         if (roworiented) {
446           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
447         } else {
448           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
449         }
450       }
451     }
452   }
453 #if defined(PETSC_USE_DEBUG)
454   baij->ht_total_ct = total_ct;
455   baij->ht_insert_ct = insert_ct;
456 #endif
457   PetscFunctionReturn(0);
458 }
459 
460 #undef __FUNCT__
461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
463 {
464   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
465   PetscBool         roworiented = baij->roworiented;
466   PetscErrorCode    ierr;
467   PetscInt          i,j,ii,jj,row,col;
468   PetscInt          rstart=baij->rstartbs;
469   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
470   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
471   PetscReal         tmp;
472   MatScalar         **HD = baij->hd,*baij_a;
473   const PetscScalar *v_t,*value;
474 #if defined(PETSC_USE_DEBUG)
475   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
476 #endif
477 
478   PetscFunctionBegin;
479 
480   if (roworiented) {
481     stepval = (n-1)*bs;
482   } else {
483     stepval = (m-1)*bs;
484   }
485   for (i=0; i<m; i++) {
486 #if defined(PETSC_USE_DEBUG)
487     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
488     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
489 #endif
490     row   = im[i];
491     v_t   = v + i*nbs2;
492     if (row >= rstart && row < rend) {
493       for (j=0; j<n; j++) {
494         col = in[j];
495 
496         /* Look up into the Hash Table */
497         key = row*Nbs+col+1;
498         h1  = HASH(size,key,tmp);
499 
500         idx = h1;
501 #if defined(PETSC_USE_DEBUG)
502         total_ct++;
503         insert_ct++;
504        if (HT[idx] != key) {
505           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
506           if (idx == size) {
507             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
508             if (idx == h1) {
509               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
510             }
511           }
512         }
513 #else
514         if (HT[idx] != key) {
515           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
516           if (idx == size) {
517             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
518             if (idx == h1) {
519               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
520             }
521           }
522         }
523 #endif
524         baij_a = HD[idx];
525         if (roworiented) {
526           /*value = v + i*(stepval+bs)*bs + j*bs;*/
527           /* value = v + (i*(stepval+bs)+j)*bs; */
528           value = v_t;
529           v_t  += bs;
530           if (addv == ADD_VALUES) {
531             for (ii=0; ii<bs; ii++,value+=stepval) {
532               for (jj=ii; jj<bs2; jj+=bs) {
533                 baij_a[jj]  += *value++;
534               }
535             }
536           } else {
537             for (ii=0; ii<bs; ii++,value+=stepval) {
538               for (jj=ii; jj<bs2; jj+=bs) {
539                 baij_a[jj]  = *value++;
540               }
541             }
542           }
543         } else {
544           value = v + j*(stepval+bs)*bs + i*bs;
545           if (addv == ADD_VALUES) {
546             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
547               for (jj=0; jj<bs; jj++) {
548                 baij_a[jj]  += *value++;
549               }
550             }
551           } else {
552             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
553               for (jj=0; jj<bs; jj++) {
554                 baij_a[jj]  = *value++;
555               }
556             }
557           }
558         }
559       }
560     } else {
561       if (!baij->donotstash) {
562         if (roworiented) {
563           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
564         } else {
565           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
566         }
567       }
568     }
569   }
570 #if defined(PETSC_USE_DEBUG)
571   baij->ht_total_ct = total_ct;
572   baij->ht_insert_ct = insert_ct;
573 #endif
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatGetValues_MPIBAIJ"
579 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
580 {
581   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
582   PetscErrorCode ierr;
583   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
584   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
585 
586   PetscFunctionBegin;
587   for (i=0; i<m; i++) {
588     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
589     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
590     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
591       row = idxm[i] - bsrstart;
592       for (j=0; j<n; j++) {
593         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
594         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
595         if (idxn[j] >= bscstart && idxn[j] < bscend){
596           col = idxn[j] - bscstart;
597           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
598         } else {
599           if (!baij->colmap) {
600             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
601           }
602 #if defined (PETSC_USE_CTABLE)
603           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
604           data --;
605 #else
606           data = baij->colmap[idxn[j]/bs]-1;
607 #endif
608           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
609           else {
610             col  = data + idxn[j]%bs;
611             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
612           }
613         }
614       }
615     } else {
616       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
617     }
618   }
619  PetscFunctionReturn(0);
620 }
621 
622 #undef __FUNCT__
623 #define __FUNCT__ "MatNorm_MPIBAIJ"
624 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
625 {
626   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
627   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
628   PetscErrorCode ierr;
629   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
630   PetscReal      sum = 0.0;
631   MatScalar      *v;
632 
633   PetscFunctionBegin;
634   if (baij->size == 1) {
635     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
636   } else {
637     if (type == NORM_FROBENIUS) {
638       v = amat->a;
639       nz = amat->nz*bs2;
640       for (i=0; i<nz; i++) {
641 #if defined(PETSC_USE_COMPLEX)
642         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
643 #else
644         sum += (*v)*(*v); v++;
645 #endif
646       }
647       v = bmat->a;
648       nz = bmat->nz*bs2;
649       for (i=0; i<nz; i++) {
650 #if defined(PETSC_USE_COMPLEX)
651         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
652 #else
653         sum += (*v)*(*v); v++;
654 #endif
655       }
656       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
657       *nrm = PetscSqrtReal(*nrm);
658     } else if (type == NORM_1) { /* max column sum */
659       PetscReal *tmp,*tmp2;
660       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
661       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
662       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
663       v = amat->a; jj = amat->j;
664       for (i=0; i<amat->nz; i++) {
665         for (j=0; j<bs; j++){
666           col = bs*(cstart + *jj) + j; /* column index */
667           for (row=0; row<bs; row++){
668             tmp[col] += PetscAbsScalar(*v);  v++;
669           }
670         }
671         jj++;
672       }
673       v = bmat->a; jj = bmat->j;
674       for (i=0; i<bmat->nz; i++) {
675         for (j=0; j<bs; j++){
676           col = bs*garray[*jj] + j;
677           for (row=0; row<bs; row++){
678             tmp[col] += PetscAbsScalar(*v); v++;
679           }
680         }
681         jj++;
682       }
683       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
684       *nrm = 0.0;
685       for (j=0; j<mat->cmap->N; j++) {
686         if (tmp2[j] > *nrm) *nrm = tmp2[j];
687       }
688       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
689     } else if (type == NORM_INFINITY) { /* max row sum */
690       PetscReal *sums;
691       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
692       sum = 0.0;
693       for (j=0; j<amat->mbs; j++) {
694         for (row=0; row<bs; row++) sums[row] = 0.0;
695         v = amat->a + bs2*amat->i[j];
696         nz = amat->i[j+1]-amat->i[j];
697         for (i=0; i<nz; i++) {
698           for (col=0; col<bs; col++){
699             for (row=0; row<bs; row++){
700               sums[row] += PetscAbsScalar(*v); v++;
701             }
702           }
703         }
704         v = bmat->a + bs2*bmat->i[j];
705         nz = bmat->i[j+1]-bmat->i[j];
706         for (i=0; i<nz; i++) {
707           for (col=0; col<bs; col++){
708             for (row=0; row<bs; row++){
709               sums[row] += PetscAbsScalar(*v); v++;
710             }
711           }
712         }
713         for (row=0; row<bs; row++){
714           if (sums[row] > sum) sum = sums[row];
715         }
716       }
717       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
718       ierr = PetscFree(sums);CHKERRQ(ierr);
719     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
720   }
721   PetscFunctionReturn(0);
722 }
723 
724 /*
725   Creates the hash table, and sets the table
726   This table is created only once.
727   If new entried need to be added to the matrix
728   then the hash table has to be destroyed and
729   recreated.
730 */
731 #undef __FUNCT__
732 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
733 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
734 {
735   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
736   Mat            A = baij->A,B=baij->B;
737   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
738   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
739   PetscErrorCode ierr;
740   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
741   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
742   PetscInt       *HT,key;
743   MatScalar      **HD;
744   PetscReal      tmp;
745 #if defined(PETSC_USE_INFO)
746   PetscInt       ct=0,max=0;
747 #endif
748 
749   PetscFunctionBegin;
750   if (baij->ht) PetscFunctionReturn(0);
751 
752   baij->ht_size = (PetscInt)(factor*nz);
753   ht_size       = baij->ht_size;
754 
755   /* Allocate Memory for Hash Table */
756   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
757   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
758   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
759   HD   = baij->hd;
760   HT   = baij->ht;
761 
762   /* Loop Over A */
763   for (i=0; i<a->mbs; i++) {
764     for (j=ai[i]; j<ai[i+1]; j++) {
765       row = i+rstart;
766       col = aj[j]+cstart;
767 
768       key = row*Nbs + col + 1;
769       h1  = HASH(ht_size,key,tmp);
770       for (k=0; k<ht_size; k++){
771         if (!HT[(h1+k)%ht_size]) {
772           HT[(h1+k)%ht_size] = key;
773           HD[(h1+k)%ht_size] = a->a + j*bs2;
774           break;
775 #if defined(PETSC_USE_INFO)
776         } else {
777           ct++;
778 #endif
779         }
780       }
781 #if defined(PETSC_USE_INFO)
782       if (k> max) max = k;
783 #endif
784     }
785   }
786   /* Loop Over B */
787   for (i=0; i<b->mbs; i++) {
788     for (j=bi[i]; j<bi[i+1]; j++) {
789       row = i+rstart;
790       col = garray[bj[j]];
791       key = row*Nbs + col + 1;
792       h1  = HASH(ht_size,key,tmp);
793       for (k=0; k<ht_size; k++){
794         if (!HT[(h1+k)%ht_size]) {
795           HT[(h1+k)%ht_size] = key;
796           HD[(h1+k)%ht_size] = b->a + j*bs2;
797           break;
798 #if defined(PETSC_USE_INFO)
799         } else {
800           ct++;
801 #endif
802         }
803       }
804 #if defined(PETSC_USE_INFO)
805       if (k> max) max = k;
806 #endif
807     }
808   }
809 
810   /* Print Summary */
811 #if defined(PETSC_USE_INFO)
812   for (i=0,j=0; i<ht_size; i++) {
813     if (HT[i]) {j++;}
814   }
815   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
816 #endif
817   PetscFunctionReturn(0);
818 }
819 
820 #undef __FUNCT__
821 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
822 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
823 {
824   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
825   PetscErrorCode ierr;
826   PetscInt       nstash,reallocs;
827   InsertMode     addv;
828 
829   PetscFunctionBegin;
830   if (baij->donotstash || mat->nooffprocentries) {
831     PetscFunctionReturn(0);
832   }
833 
834   /* make sure all processors are either in INSERTMODE or ADDMODE */
835   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
836   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
837   mat->insertmode = addv; /* in case this processor had no cache */
838 
839   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
840   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
841   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
842   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
843   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
844   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
845   PetscFunctionReturn(0);
846 }
847 
848 #undef __FUNCT__
849 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
850 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
851 {
852   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
853   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
854   PetscErrorCode ierr;
855   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
856   PetscInt       *row,*col;
857   PetscBool      r1,r2,r3,other_disassembled;
858   MatScalar      *val;
859   InsertMode     addv = mat->insertmode;
860   PetscMPIInt    n;
861 
862   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
863   PetscFunctionBegin;
864   if (!baij->donotstash && !mat->nooffprocentries) {
865     while (1) {
866       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
867       if (!flg) break;
868 
869       for (i=0; i<n;) {
870         /* Now identify the consecutive vals belonging to the same row */
871         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
872         if (j < n) ncols = j-i;
873         else       ncols = n-i;
874         /* Now assemble all these values with a single function call */
875         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
876         i = j;
877       }
878     }
879     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
880     /* Now process the block-stash. Since the values are stashed column-oriented,
881        set the roworiented flag to column oriented, and after MatSetValues()
882        restore the original flags */
883     r1 = baij->roworiented;
884     r2 = a->roworiented;
885     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
886     baij->roworiented = PETSC_FALSE;
887     a->roworiented    = PETSC_FALSE;
888     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
889     while (1) {
890       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
891       if (!flg) break;
892 
893       for (i=0; i<n;) {
894         /* Now identify the consecutive vals belonging to the same row */
895         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
896         if (j < n) ncols = j-i;
897         else       ncols = n-i;
898         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
899         i = j;
900       }
901     }
902     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
903     baij->roworiented = r1;
904     a->roworiented    = r2;
905     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
906   }
907 
908   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
909   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
910 
911   /* determine if any processor has disassembled, if so we must
912      also disassemble ourselfs, in order that we may reassemble. */
913   /*
914      if nonzero structure of submatrix B cannot change then we know that
915      no processor disassembled thus we can skip this stuff
916   */
917   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
918     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
919     if (mat->was_assembled && !other_disassembled) {
920       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
921     }
922   }
923 
924   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
925     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
926   }
927   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
928   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
929   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
930 
931 #if defined(PETSC_USE_INFO)
932   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
933     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
934     baij->ht_total_ct  = 0;
935     baij->ht_insert_ct = 0;
936   }
937 #endif
938   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
939     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
940     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
941     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
942   }
943 
944   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
945   baij->rowvalues = 0;
946   PetscFunctionReturn(0);
947 }
948 
949 #undef __FUNCT__
950 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
951 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
952 {
953   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
954   PetscErrorCode    ierr;
955   PetscMPIInt       size = baij->size,rank = baij->rank;
956   PetscInt          bs = mat->rmap->bs;
957   PetscBool         iascii,isdraw;
958   PetscViewer       sviewer;
959   PetscViewerFormat format;
960 
961   PetscFunctionBegin;
962   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
963   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
964   if (iascii) {
965     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
966     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
967       MatInfo info;
968       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
969       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
970       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
971       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
972              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
973       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
974       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
975       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
976       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
977       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
978       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
979       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
980       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
981       PetscFunctionReturn(0);
982     } else if (format == PETSC_VIEWER_ASCII_INFO) {
983       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
984       PetscFunctionReturn(0);
985     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
986       PetscFunctionReturn(0);
987     }
988   }
989 
990   if (isdraw) {
991     PetscDraw       draw;
992     PetscBool  isnull;
993     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
994     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
995   }
996 
997   if (size == 1) {
998     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
999     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1000   } else {
1001     /* assemble the entire matrix onto first processor. */
1002     Mat         A;
1003     Mat_SeqBAIJ *Aloc;
1004     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1005     MatScalar   *a;
1006 
1007     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1008     /* Perhaps this should be the type of mat? */
1009     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1010     if (!rank) {
1011       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1012     } else {
1013       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1014     }
1015     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1016     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1017     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1018     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1019 
1020     /* copy over the A part */
1021     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1022     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1023     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1024 
1025     for (i=0; i<mbs; i++) {
1026       rvals[0] = bs*(baij->rstartbs + i);
1027       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1028       for (j=ai[i]; j<ai[i+1]; j++) {
1029         col = (baij->cstartbs+aj[j])*bs;
1030         for (k=0; k<bs; k++) {
1031           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1032           col++; a += bs;
1033         }
1034       }
1035     }
1036     /* copy over the B part */
1037     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1038     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1039     for (i=0; i<mbs; i++) {
1040       rvals[0] = bs*(baij->rstartbs + i);
1041       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1042       for (j=ai[i]; j<ai[i+1]; j++) {
1043         col = baij->garray[aj[j]]*bs;
1044         for (k=0; k<bs; k++) {
1045           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1046           col++; a += bs;
1047         }
1048       }
1049     }
1050     ierr = PetscFree(rvals);CHKERRQ(ierr);
1051     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1052     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1053     /*
1054        Everyone has to call to draw the matrix since the graphics waits are
1055        synchronized across all processors that share the PetscDraw object
1056     */
1057     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1058     if (!rank) {
1059       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1060     /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1061       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1062       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1063     }
1064     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1065     ierr = MatDestroy(&A);CHKERRQ(ierr);
1066   }
1067   PetscFunctionReturn(0);
1068 }
1069 
1070 #undef __FUNCT__
1071 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1072 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1073 {
1074   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1075   Mat_SeqBAIJ*   A = (Mat_SeqBAIJ*)a->A->data;
1076   Mat_SeqBAIJ*   B = (Mat_SeqBAIJ*)a->B->data;
1077   PetscErrorCode ierr;
1078   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1079   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1080   int            fd;
1081   PetscScalar    *column_values;
1082   FILE           *file;
1083   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1084   PetscInt       message_count,flowcontrolcount;
1085 
1086   PetscFunctionBegin;
1087   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1088   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1089   nz   = bs2*(A->nz + B->nz);
1090   rlen = mat->rmap->n;
1091   if (!rank) {
1092     header[0] = MAT_FILE_CLASSID;
1093     header[1] = mat->rmap->N;
1094     header[2] = mat->cmap->N;
1095     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1096     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1097     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1098     /* get largest number of rows any processor has */
1099     range = mat->rmap->range;
1100     for (i=1; i<size; i++) {
1101       rlen = PetscMax(rlen,range[i+1] - range[i]);
1102     }
1103   } else {
1104     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1105   }
1106 
1107   ierr  = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1108   /* compute lengths of each row  */
1109   for (i=0; i<a->mbs; i++) {
1110     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1111   }
1112   /* store the row lengths to the file */
1113   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1114   if (!rank) {
1115     MPI_Status status;
1116     ierr  = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1117     rlen  = (range[1] - range[0])/bs;
1118     for (i=0; i<rlen; i++) {
1119       for (j=0; j<bs; j++) {
1120         row_lens[i*bs+j] = bs*crow_lens[i];
1121       }
1122     }
1123     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1124     for (i=1; i<size; i++) {
1125       rlen = (range[i+1] - range[i])/bs;
1126       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1127       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1128       for (k=0; k<rlen; k++) {
1129 	for (j=0; j<bs; j++) {
1130 	  row_lens[k*bs+j] = bs*crow_lens[k];
1131 	}
1132       }
1133       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1134     }
1135     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1136     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1137   } else {
1138     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1139     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1140     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1141   }
1142   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1143 
1144   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1145      information needed to make it for each row from a block row. This does require more communication but still not more than
1146      the communication needed for the nonzero values  */
1147   nzmax = nz; /*  space a largest processor needs */
1148   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1149   ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1150   cnt  = 0;
1151   for (i=0; i<a->mbs; i++) {
1152     pcnt = cnt;
1153     for (j=B->i[i]; j<B->i[i+1]; j++) {
1154       if ( (col = garray[B->j[j]]) > cstart) break;
1155       for (l=0; l<bs; l++) {
1156 	column_indices[cnt++] = bs*col+l;
1157       }
1158     }
1159     for (k=A->i[i]; k<A->i[i+1]; k++) {
1160       for (l=0; l<bs; l++) {
1161         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1162       }
1163     }
1164     for (; j<B->i[i+1]; j++) {
1165       for (l=0; l<bs; l++) {
1166         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1167       }
1168     }
1169     len = cnt - pcnt;
1170     for (k=1; k<bs; k++) {
1171       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1172       cnt += len;
1173     }
1174   }
1175   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1176 
1177   /* store the columns to the file */
1178   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1179   if (!rank) {
1180     MPI_Status status;
1181     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1182     for (i=1; i<size; i++) {
1183       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1184       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1185       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1186       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1187     }
1188     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1189   } else {
1190     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1191     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1192     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1193     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1194   }
1195   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1196 
1197   /* load up the numerical values */
1198   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1199   cnt = 0;
1200   for (i=0; i<a->mbs; i++) {
1201     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1202     for (j=B->i[i]; j<B->i[i+1]; j++) {
1203       if ( garray[B->j[j]] > cstart) break;
1204       for (l=0; l<bs; l++) {
1205         for (ll=0; ll<bs; ll++) {
1206 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1207         }
1208       }
1209       cnt += bs;
1210     }
1211     for (k=A->i[i]; k<A->i[i+1]; k++) {
1212       for (l=0; l<bs; l++) {
1213         for (ll=0; ll<bs; ll++) {
1214           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1215         }
1216       }
1217       cnt += bs;
1218     }
1219     for (; j<B->i[i+1]; j++) {
1220       for (l=0; l<bs; l++) {
1221         for (ll=0; ll<bs; ll++) {
1222 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1223         }
1224       }
1225       cnt += bs;
1226     }
1227     cnt += (bs-1)*rlen;
1228   }
1229   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1230 
1231   /* store the column values to the file */
1232   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1233   if (!rank) {
1234     MPI_Status status;
1235     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1236     for (i=1; i<size; i++) {
1237       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1238       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1239       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1240       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1241     }
1242     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1243   } else {
1244     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1245     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1246     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1247     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1248   }
1249   ierr = PetscFree(column_values);CHKERRQ(ierr);
1250 
1251   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1252   if (file) {
1253     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatView_MPIBAIJ"
1260 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1261 {
1262   PetscErrorCode ierr;
1263   PetscBool      iascii,isdraw,issocket,isbinary;
1264 
1265   PetscFunctionBegin;
1266   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1267   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1268   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1269   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1270   if (iascii || isdraw || issocket) {
1271     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1272   } else if (isbinary) {
1273     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1274   } else {
1275     SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1276   }
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 #undef __FUNCT__
1281 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1282 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1283 {
1284   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1285   PetscErrorCode ierr;
1286 
1287   PetscFunctionBegin;
1288 #if defined(PETSC_USE_LOG)
1289   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1290 #endif
1291   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1292   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1293   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1294   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1295 #if defined (PETSC_USE_CTABLE)
1296   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1297 #else
1298   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1299 #endif
1300   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1301   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1302   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1303   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1304   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1305   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1306   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1307   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1308 
1309   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1310   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1311   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1312   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1313   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1314   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1315   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1316   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1317   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1318   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 #undef __FUNCT__
1323 #define __FUNCT__ "MatMult_MPIBAIJ"
1324 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1325 {
1326   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1327   PetscErrorCode ierr;
1328   PetscInt       nt;
1329 
1330   PetscFunctionBegin;
1331   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1332   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1333   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1334   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1335   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1336   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1337   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1338   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 #undef __FUNCT__
1343 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1344 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1345 {
1346   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1347   PetscErrorCode ierr;
1348 
1349   PetscFunctionBegin;
1350   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1351   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1352   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1353   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1354   PetscFunctionReturn(0);
1355 }
1356 
1357 #undef __FUNCT__
1358 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1359 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1360 {
1361   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1362   PetscErrorCode ierr;
1363   PetscBool      merged;
1364 
1365   PetscFunctionBegin;
1366   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1367   /* do nondiagonal part */
1368   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1369   if (!merged) {
1370     /* send it on its way */
1371     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1372     /* do local part */
1373     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1374     /* receive remote parts: note this assumes the values are not actually */
1375     /* inserted in yy until the next line */
1376     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1377   } else {
1378     /* do local part */
1379     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1380     /* send it on its way */
1381     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1382     /* values actually were received in the Begin() but we need to call this nop */
1383     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1384   }
1385   PetscFunctionReturn(0);
1386 }
1387 
1388 #undef __FUNCT__
1389 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1390 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1391 {
1392   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1393   PetscErrorCode ierr;
1394 
1395   PetscFunctionBegin;
1396   /* do nondiagonal part */
1397   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1398   /* send it on its way */
1399   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1400   /* do local part */
1401   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1402   /* receive remote parts: note this assumes the values are not actually */
1403   /* inserted in yy until the next line, which is true for my implementation*/
1404   /* but is not perhaps always true. */
1405   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 /*
1410   This only works correctly for square matrices where the subblock A->A is the
1411    diagonal block
1412 */
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1415 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1416 {
1417   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1418   PetscErrorCode ierr;
1419 
1420   PetscFunctionBegin;
1421   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1422   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 #undef __FUNCT__
1427 #define __FUNCT__ "MatScale_MPIBAIJ"
1428 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1429 {
1430   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1431   PetscErrorCode ierr;
1432 
1433   PetscFunctionBegin;
1434   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1435   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 #undef __FUNCT__
1440 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1441 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1442 {
1443   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1444   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1445   PetscErrorCode ierr;
1446   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1447   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1448   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1449 
1450   PetscFunctionBegin;
1451   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1452   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1453   mat->getrowactive = PETSC_TRUE;
1454 
1455   if (!mat->rowvalues && (idx || v)) {
1456     /*
1457         allocate enough space to hold information from the longest row.
1458     */
1459     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1460     PetscInt     max = 1,mbs = mat->mbs,tmp;
1461     for (i=0; i<mbs; i++) {
1462       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1463       if (max < tmp) { max = tmp; }
1464     }
1465     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1466   }
1467   lrow = row - brstart;
1468 
1469   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1470   if (!v)   {pvA = 0; pvB = 0;}
1471   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1472   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1473   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1474   nztot = nzA + nzB;
1475 
1476   cmap  = mat->garray;
1477   if (v  || idx) {
1478     if (nztot) {
1479       /* Sort by increasing column numbers, assuming A and B already sorted */
1480       PetscInt imark = -1;
1481       if (v) {
1482         *v = v_p = mat->rowvalues;
1483         for (i=0; i<nzB; i++) {
1484           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1485           else break;
1486         }
1487         imark = i;
1488         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1489         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1490       }
1491       if (idx) {
1492         *idx = idx_p = mat->rowindices;
1493         if (imark > -1) {
1494           for (i=0; i<imark; i++) {
1495             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1496           }
1497         } else {
1498           for (i=0; i<nzB; i++) {
1499             if (cmap[cworkB[i]/bs] < cstart)
1500               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1501             else break;
1502           }
1503           imark = i;
1504         }
1505         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1506         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1507       }
1508     } else {
1509       if (idx) *idx = 0;
1510       if (v)   *v   = 0;
1511     }
1512   }
1513   *nz = nztot;
1514   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1515   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1516   PetscFunctionReturn(0);
1517 }
1518 
1519 #undef __FUNCT__
1520 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1521 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1522 {
1523   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1524 
1525   PetscFunctionBegin;
1526   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1527   baij->getrowactive = PETSC_FALSE;
1528   PetscFunctionReturn(0);
1529 }
1530 
1531 #undef __FUNCT__
1532 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1533 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1534 {
1535   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1536   PetscErrorCode ierr;
1537 
1538   PetscFunctionBegin;
1539   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1540   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 #undef __FUNCT__
1545 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1546 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1547 {
1548   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1549   Mat            A = a->A,B = a->B;
1550   PetscErrorCode ierr;
1551   PetscReal      isend[5],irecv[5];
1552 
1553   PetscFunctionBegin;
1554   info->block_size     = (PetscReal)matin->rmap->bs;
1555   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1556   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1557   isend[3] = info->memory;  isend[4] = info->mallocs;
1558   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1559   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1560   isend[3] += info->memory;  isend[4] += info->mallocs;
1561   if (flag == MAT_LOCAL) {
1562     info->nz_used      = isend[0];
1563     info->nz_allocated = isend[1];
1564     info->nz_unneeded  = isend[2];
1565     info->memory       = isend[3];
1566     info->mallocs      = isend[4];
1567   } else if (flag == MAT_GLOBAL_MAX) {
1568     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1569     info->nz_used      = irecv[0];
1570     info->nz_allocated = irecv[1];
1571     info->nz_unneeded  = irecv[2];
1572     info->memory       = irecv[3];
1573     info->mallocs      = irecv[4];
1574   } else if (flag == MAT_GLOBAL_SUM) {
1575     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1576     info->nz_used      = irecv[0];
1577     info->nz_allocated = irecv[1];
1578     info->nz_unneeded  = irecv[2];
1579     info->memory       = irecv[3];
1580     info->mallocs      = irecv[4];
1581   } else {
1582     SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1583   }
1584   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1585   info->fill_ratio_needed = 0;
1586   info->factor_mallocs    = 0;
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 #undef __FUNCT__
1591 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1592 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool  flg)
1593 {
1594   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1595   PetscErrorCode ierr;
1596 
1597   PetscFunctionBegin;
1598   switch (op) {
1599   case MAT_NEW_NONZERO_LOCATIONS:
1600   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1601   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1602   case MAT_KEEP_NONZERO_PATTERN:
1603   case MAT_NEW_NONZERO_LOCATION_ERR:
1604     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1605     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1606     break;
1607   case MAT_ROW_ORIENTED:
1608     a->roworiented = flg;
1609     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1610     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1611     break;
1612   case MAT_NEW_DIAGONALS:
1613     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1614     break;
1615   case MAT_IGNORE_OFF_PROC_ENTRIES:
1616     a->donotstash = flg;
1617     break;
1618   case MAT_USE_HASH_TABLE:
1619     a->ht_flag = flg;
1620     break;
1621   case MAT_SYMMETRIC:
1622   case MAT_STRUCTURALLY_SYMMETRIC:
1623   case MAT_HERMITIAN:
1624   case MAT_SYMMETRY_ETERNAL:
1625     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1626     break;
1627   default:
1628     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1629   }
1630   PetscFunctionReturn(0);
1631 }
1632 
1633 #undef __FUNCT__
1634 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1635 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1636 {
1637   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1638   Mat_SeqBAIJ    *Aloc;
1639   Mat            B;
1640   PetscErrorCode ierr;
1641   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1642   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1643   MatScalar      *a;
1644 
1645   PetscFunctionBegin;
1646   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1647   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1648     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1649     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1650     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1651     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1652   } else {
1653     B = *matout;
1654   }
1655 
1656   /* copy over the A part */
1657   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1658   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1659   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1660 
1661   for (i=0; i<mbs; i++) {
1662     rvals[0] = bs*(baij->rstartbs + i);
1663     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1664     for (j=ai[i]; j<ai[i+1]; j++) {
1665       col = (baij->cstartbs+aj[j])*bs;
1666       for (k=0; k<bs; k++) {
1667         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1668         col++; a += bs;
1669       }
1670     }
1671   }
1672   /* copy over the B part */
1673   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1674   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1675   for (i=0; i<mbs; i++) {
1676     rvals[0] = bs*(baij->rstartbs + i);
1677     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1678     for (j=ai[i]; j<ai[i+1]; j++) {
1679       col = baij->garray[aj[j]]*bs;
1680       for (k=0; k<bs; k++) {
1681         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1682         col++; a += bs;
1683       }
1684     }
1685   }
1686   ierr = PetscFree(rvals);CHKERRQ(ierr);
1687   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1688   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1689 
1690   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1691     *matout = B;
1692   } else {
1693     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1694   }
1695   PetscFunctionReturn(0);
1696 }
1697 
1698 #undef __FUNCT__
1699 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1700 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1701 {
1702   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1703   Mat            a = baij->A,b = baij->B;
1704   PetscErrorCode ierr;
1705   PetscInt       s1,s2,s3;
1706 
1707   PetscFunctionBegin;
1708   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1709   if (rr) {
1710     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1711     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1712     /* Overlap communication with computation. */
1713     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1714   }
1715   if (ll) {
1716     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1717     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1718     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1719   }
1720   /* scale  the diagonal block */
1721   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1722 
1723   if (rr) {
1724     /* Do a scatter end and then right scale the off-diagonal block */
1725     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1726     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1727   }
1728 
1729   PetscFunctionReturn(0);
1730 }
1731 
1732 #undef __FUNCT__
1733 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1734 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1735 {
1736   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1737   PetscErrorCode    ierr;
1738   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1739   PetscInt          i,*owners = A->rmap->range;
1740   PetscInt          *nprocs,j,idx,nsends,row;
1741   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1742   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1743   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1744   MPI_Comm          comm = ((PetscObject)A)->comm;
1745   MPI_Request       *send_waits,*recv_waits;
1746   MPI_Status        recv_status,*send_status;
1747   const PetscScalar *xx;
1748   PetscScalar       *bb;
1749 #if defined(PETSC_DEBUG)
1750   PetscBool         found = PETSC_FALSE;
1751 #endif
1752 
1753   PetscFunctionBegin;
1754   /*  first count number of contributors to each processor */
1755   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1756   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1757   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1758   j = 0;
1759   for (i=0; i<N; i++) {
1760     if (lastidx > (idx = rows[i])) j = 0;
1761     lastidx = idx;
1762     for (; j<size; j++) {
1763       if (idx >= owners[j] && idx < owners[j+1]) {
1764         nprocs[2*j]++;
1765         nprocs[2*j+1] = 1;
1766         owner[i] = j;
1767 #if defined(PETSC_DEBUG)
1768         found = PETSC_TRUE;
1769 #endif
1770         break;
1771       }
1772     }
1773 #if defined(PETSC_DEBUG)
1774     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1775     found = PETSC_FALSE;
1776 #endif
1777   }
1778   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1779 
1780   if (A->nooffproczerorows) {
1781     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1782     nrecvs = nsends;
1783     nmax   = N;
1784   } else {
1785     /* inform other processors of number of messages and max length*/
1786     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1787   }
1788 
1789   /* post receives:   */
1790   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1791   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1792   for (i=0; i<nrecvs; i++) {
1793     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1794   }
1795 
1796   /* do sends:
1797      1) starts[i] gives the starting index in svalues for stuff going to
1798      the ith processor
1799   */
1800   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1801   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1802   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1803   starts[0]  = 0;
1804   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1805   for (i=0; i<N; i++) {
1806     svalues[starts[owner[i]]++] = rows[i];
1807   }
1808 
1809   starts[0] = 0;
1810   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1811   count = 0;
1812   for (i=0; i<size; i++) {
1813     if (nprocs[2*i+1]) {
1814       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1815     }
1816   }
1817   ierr = PetscFree(starts);CHKERRQ(ierr);
1818 
1819   base = owners[rank];
1820 
1821   /*  wait on receives */
1822   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1823   count  = nrecvs;
1824   slen = 0;
1825   while (count) {
1826     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1827     /* unpack receives into our local space */
1828     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1829     source[imdex]  = recv_status.MPI_SOURCE;
1830     lens[imdex]    = n;
1831     slen          += n;
1832     count--;
1833   }
1834   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1835 
1836   /* move the data into the send scatter */
1837   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1838   count = 0;
1839   for (i=0; i<nrecvs; i++) {
1840     values = rvalues + i*nmax;
1841     for (j=0; j<lens[i]; j++) {
1842       lrows[count++] = values[j] - base;
1843     }
1844   }
1845   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1846   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1847   ierr = PetscFree(owner);CHKERRQ(ierr);
1848   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1849 
1850   /* fix right hand side if needed */
1851   if (x && b) {
1852     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1853     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1854     for (i=0; i<slen; i++) {
1855       bb[lrows[i]] = diag*xx[lrows[i]];
1856     }
1857     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1858     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1859   }
1860 
1861   /* actually zap the local rows */
1862   /*
1863         Zero the required rows. If the "diagonal block" of the matrix
1864      is square and the user wishes to set the diagonal we use separate
1865      code so that MatSetValues() is not called for each diagonal allocating
1866      new memory, thus calling lots of mallocs and slowing things down.
1867 
1868   */
1869   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1870   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1871   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1872     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1873   } else if (diag != 0.0) {
1874     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1875     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1876        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1877     for (i=0; i<slen; i++) {
1878       row  = lrows[i] + rstart_bs;
1879       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1880     }
1881     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1882     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1883   } else {
1884     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1885   }
1886 
1887   ierr = PetscFree(lrows);CHKERRQ(ierr);
1888 
1889   /* wait on sends */
1890   if (nsends) {
1891     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1892     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1893     ierr = PetscFree(send_status);CHKERRQ(ierr);
1894   }
1895   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1896   ierr = PetscFree(svalues);CHKERRQ(ierr);
1897 
1898   PetscFunctionReturn(0);
1899 }
1900 
1901 #undef __FUNCT__
1902 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1903 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1904 {
1905   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1906   PetscErrorCode ierr;
1907 
1908   PetscFunctionBegin;
1909   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1910   PetscFunctionReturn(0);
1911 }
1912 
1913 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1914 
1915 #undef __FUNCT__
1916 #define __FUNCT__ "MatEqual_MPIBAIJ"
1917 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1918 {
1919   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1920   Mat            a,b,c,d;
1921   PetscBool      flg;
1922   PetscErrorCode ierr;
1923 
1924   PetscFunctionBegin;
1925   a = matA->A; b = matA->B;
1926   c = matB->A; d = matB->B;
1927 
1928   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1929   if (flg) {
1930     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1931   }
1932   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1933   PetscFunctionReturn(0);
1934 }
1935 
1936 #undef __FUNCT__
1937 #define __FUNCT__ "MatCopy_MPIBAIJ"
1938 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1939 {
1940   PetscErrorCode ierr;
1941   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1942   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1943 
1944   PetscFunctionBegin;
1945   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1946   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1947     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1948   } else {
1949     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1950     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1951   }
1952   PetscFunctionReturn(0);
1953 }
1954 
1955 #undef __FUNCT__
1956 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1957 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1958 {
1959   PetscErrorCode ierr;
1960 
1961   PetscFunctionBegin;
1962   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1963   PetscFunctionReturn(0);
1964 }
1965 
1966 #undef __FUNCT__
1967 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1968 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1969 {
1970   PetscErrorCode ierr;
1971   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1972   PetscBLASInt   bnz,one=1;
1973   Mat_SeqBAIJ    *x,*y;
1974 
1975   PetscFunctionBegin;
1976   if (str == SAME_NONZERO_PATTERN) {
1977     PetscScalar alpha = a;
1978     x = (Mat_SeqBAIJ *)xx->A->data;
1979     y = (Mat_SeqBAIJ *)yy->A->data;
1980     bnz = PetscBLASIntCast(x->nz);
1981     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1982     x = (Mat_SeqBAIJ *)xx->B->data;
1983     y = (Mat_SeqBAIJ *)yy->B->data;
1984     bnz = PetscBLASIntCast(x->nz);
1985     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1986   } else {
1987     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1988   }
1989   PetscFunctionReturn(0);
1990 }
1991 
1992 #undef __FUNCT__
1993 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ"
1994 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1995 {
1996   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1997   PetscInt rbs,cbs;
1998   PetscErrorCode ierr;
1999 
2000   PetscFunctionBegin;
2001   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2002   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2003   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2004   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2005   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2006   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2007   PetscFunctionReturn(0);
2008 }
2009 
2010 #undef __FUNCT__
2011 #define __FUNCT__ "MatRealPart_MPIBAIJ"
2012 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2013 {
2014   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2015   PetscErrorCode ierr;
2016 
2017   PetscFunctionBegin;
2018   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2019   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2020   PetscFunctionReturn(0);
2021 }
2022 
2023 #undef __FUNCT__
2024 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2025 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2026 {
2027   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2028   PetscErrorCode ierr;
2029 
2030   PetscFunctionBegin;
2031   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2032   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2033   PetscFunctionReturn(0);
2034 }
2035 
2036 #undef __FUNCT__
2037 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2038 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2039 {
2040   PetscErrorCode ierr;
2041   IS             iscol_local;
2042   PetscInt       csize;
2043 
2044   PetscFunctionBegin;
2045   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2046   if (call == MAT_REUSE_MATRIX) {
2047     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2048     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2049   } else {
2050     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2051   }
2052   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2053   if (call == MAT_INITIAL_MATRIX) {
2054     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2055     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2056   }
2057   PetscFunctionReturn(0);
2058 }
2059 
2060 #undef __FUNCT__
2061 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2062 /*
2063     Not great since it makes two copies of the submatrix, first an SeqBAIJ
2064   in local and then by concatenating the local matrices the end result.
2065   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2066 */
2067 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2068 {
2069   PetscErrorCode ierr;
2070   PetscMPIInt    rank,size;
2071   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2072   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2073   Mat            *local,M,Mreuse;
2074   MatScalar      *vwork,*aa;
2075   MPI_Comm       comm = ((PetscObject)mat)->comm;
2076   Mat_SeqBAIJ    *aij;
2077 
2078 
2079   PetscFunctionBegin;
2080   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2081   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2082 
2083   if (call ==  MAT_REUSE_MATRIX) {
2084     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2085     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2086     local = &Mreuse;
2087     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2088   } else {
2089     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2090     Mreuse = *local;
2091     ierr   = PetscFree(local);CHKERRQ(ierr);
2092   }
2093 
2094   /*
2095       m - number of local rows
2096       n - number of columns (same on all processors)
2097       rstart - first row in new global matrix generated
2098   */
2099   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2100   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2101   m    = m/bs;
2102   n    = n/bs;
2103 
2104   if (call == MAT_INITIAL_MATRIX) {
2105     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2106     ii  = aij->i;
2107     jj  = aij->j;
2108 
2109     /*
2110         Determine the number of non-zeros in the diagonal and off-diagonal
2111         portions of the matrix in order to do correct preallocation
2112     */
2113 
2114     /* first get start and end of "diagonal" columns */
2115     if (csize == PETSC_DECIDE) {
2116       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2117       if (mglobal == n*bs) { /* square matrix */
2118 	nlocal = m;
2119       } else {
2120         nlocal = n/size + ((n % size) > rank);
2121       }
2122     } else {
2123       nlocal = csize/bs;
2124     }
2125     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2126     rstart = rend - nlocal;
2127     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2128 
2129     /* next, compute all the lengths */
2130     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2131     olens = dlens + m;
2132     for (i=0; i<m; i++) {
2133       jend = ii[i+1] - ii[i];
2134       olen = 0;
2135       dlen = 0;
2136       for (j=0; j<jend; j++) {
2137         if (*jj < rstart || *jj >= rend) olen++;
2138         else dlen++;
2139         jj++;
2140       }
2141       olens[i] = olen;
2142       dlens[i] = dlen;
2143     }
2144     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2145     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2146     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2147     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2148     ierr = PetscFree(dlens);CHKERRQ(ierr);
2149   } else {
2150     PetscInt ml,nl;
2151 
2152     M = *newmat;
2153     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2154     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2155     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2156     /*
2157          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2158        rather than the slower MatSetValues().
2159     */
2160     M->was_assembled = PETSC_TRUE;
2161     M->assembled     = PETSC_FALSE;
2162   }
2163   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2164   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2165   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2166   ii  = aij->i;
2167   jj  = aij->j;
2168   aa  = aij->a;
2169   for (i=0; i<m; i++) {
2170     row   = rstart/bs + i;
2171     nz    = ii[i+1] - ii[i];
2172     cwork = jj;     jj += nz;
2173     vwork = aa;     aa += nz;
2174     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2175   }
2176 
2177   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2178   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2179   *newmat = M;
2180 
2181   /* save submatrix used in processor for next request */
2182   if (call ==  MAT_INITIAL_MATRIX) {
2183     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2184     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2185   }
2186 
2187   PetscFunctionReturn(0);
2188 }
2189 
2190 #undef __FUNCT__
2191 #define __FUNCT__ "MatPermute_MPIBAIJ"
2192 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2193 {
2194   MPI_Comm       comm,pcomm;
2195   PetscInt       first,local_size,nrows;
2196   const PetscInt *rows;
2197   PetscMPIInt    size;
2198   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
2199   PetscErrorCode ierr;
2200 
2201   PetscFunctionBegin;
2202   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2203   /* make a collective version of 'rowp' */
2204   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2205   if (pcomm==comm) {
2206     crowp = rowp;
2207   } else {
2208     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2209     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2210     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2211     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2212   }
2213   /* collect the global row permutation and invert it */
2214   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2215   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2216   if (pcomm!=comm) {
2217     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2218   }
2219   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2220   /* get the local target indices */
2221   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2222   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2223   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2224   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2225   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2226   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
2227   /* the column permutation is so much easier;
2228      make a local version of 'colp' and invert it */
2229   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2230   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2231   if (size==1) {
2232     lcolp = colp;
2233   } else {
2234     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2235     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2236     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
2237   }
2238   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2239   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2240   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2241   if (size>1) {
2242     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2243     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2244   }
2245   /* now we just get the submatrix */
2246   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2247   /* clean up */
2248   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
2249   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
2250   PetscFunctionReturn(0);
2251 }
2252 
2253 #undef __FUNCT__
2254 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2255 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2256 {
2257   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2258   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2259 
2260   PetscFunctionBegin;
2261   if (nghosts) { *nghosts = B->nbs;}
2262   if (ghosts) {*ghosts = baij->garray;}
2263   PetscFunctionReturn(0);
2264 }
2265 
2266 extern PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2267 
2268 #undef __FUNCT__
2269 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2270 /*
2271     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2272 */
2273 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2274 {
2275   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2276   PetscErrorCode        ierr;
2277   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2278   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2279   const PetscInt        *is;
2280   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2281   PetscInt              *rowhit,M,cstart,cend,colb;
2282   PetscInt              *columnsforrow,l;
2283   IS                    *isa;
2284   PetscBool              done,flg;
2285   ISLocalToGlobalMapping map = mat->cmap->bmapping;
2286   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2287 
2288   PetscFunctionBegin;
2289   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2290   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2291 
2292   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2293   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2294   M                = mat->rmap->n/bs;
2295   cstart           = mat->cmap->rstart/bs;
2296   cend             = mat->cmap->rend/bs;
2297   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2298   c->N             = mat->cmap->N/bs;
2299   c->m             = mat->rmap->n/bs;
2300   c->rstart        = mat->rmap->rstart/bs;
2301 
2302   c->ncolors       = nis;
2303   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2304   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2305   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2306   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2307   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2308   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2309 
2310   /* Allow access to data structures of local part of matrix */
2311   if (!baij->colmap) {
2312     ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2313   }
2314   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2315   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2316 
2317   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2318   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2319 
2320   for (i=0; i<nis; i++) {
2321     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2322     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2323     c->ncolumns[i] = n;
2324     if (n) {
2325       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2326       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2327       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2328     } else {
2329       c->columns[i]  = 0;
2330     }
2331 
2332     if (ctype == IS_COLORING_GLOBAL){
2333       /* Determine the total (parallel) number of columns of this color */
2334       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2335       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2336 
2337       nn   = PetscMPIIntCast(n);
2338       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2339       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2340       if (!nctot) {
2341         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2342       }
2343 
2344       disp[0] = 0;
2345       for (j=1; j<size; j++) {
2346         disp[j] = disp[j-1] + ncolsonproc[j-1];
2347       }
2348 
2349       /* Get complete list of columns for color on each processor */
2350       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2351       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2352       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2353     } else if (ctype == IS_COLORING_GHOSTED){
2354       /* Determine local number of columns of this color on this process, including ghost points */
2355       nctot = n;
2356       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2357       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2358     } else {
2359       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2360     }
2361 
2362     /*
2363        Mark all rows affect by these columns
2364     */
2365     /* Temporary option to allow for debugging/testing */
2366     flg  = PETSC_FALSE;
2367     ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2368     if (!flg) {/*-----------------------------------------------------------------------------*/
2369       /* crude, fast version */
2370       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2371       /* loop over columns*/
2372       for (j=0; j<nctot; j++) {
2373         if (ctype == IS_COLORING_GHOSTED) {
2374           col = ltog[cols[j]];
2375         } else {
2376           col  = cols[j];
2377         }
2378         if (col >= cstart && col < cend) {
2379           /* column is in diagonal block of matrix */
2380           rows = A_cj + A_ci[col-cstart];
2381           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2382         } else {
2383 #if defined (PETSC_USE_CTABLE)
2384           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2385 	  colb --;
2386 #else
2387           colb = baij->colmap[col] - 1;
2388 #endif
2389           if (colb == -1) {
2390             m = 0;
2391           } else {
2392             colb = colb/bs;
2393             rows = B_cj + B_ci[colb];
2394             m    = B_ci[colb+1] - B_ci[colb];
2395           }
2396         }
2397         /* loop over columns marking them in rowhit */
2398         for (k=0; k<m; k++) {
2399           rowhit[*rows++] = col + 1;
2400         }
2401       }
2402 
2403       /* count the number of hits */
2404       nrows = 0;
2405       for (j=0; j<M; j++) {
2406         if (rowhit[j]) nrows++;
2407       }
2408       c->nrows[i]         = nrows;
2409       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2410       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2411       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2412       nrows = 0;
2413       for (j=0; j<M; j++) {
2414         if (rowhit[j]) {
2415           c->rows[i][nrows]           = j;
2416           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2417           nrows++;
2418         }
2419       }
2420     } else {/*-------------------------------------------------------------------------------*/
2421       /* slow version, using rowhit as a linked list */
2422       PetscInt currentcol,fm,mfm;
2423       rowhit[M] = M;
2424       nrows     = 0;
2425       /* loop over columns*/
2426       for (j=0; j<nctot; j++) {
2427         if (ctype == IS_COLORING_GHOSTED) {
2428           col = ltog[cols[j]];
2429         } else {
2430           col  = cols[j];
2431         }
2432         if (col >= cstart && col < cend) {
2433           /* column is in diagonal block of matrix */
2434           rows = A_cj + A_ci[col-cstart];
2435           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2436         } else {
2437 #if defined (PETSC_USE_CTABLE)
2438           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2439           colb --;
2440 #else
2441           colb = baij->colmap[col] - 1;
2442 #endif
2443           if (colb == -1) {
2444             m = 0;
2445           } else {
2446             colb = colb/bs;
2447             rows = B_cj + B_ci[colb];
2448             m    = B_ci[colb+1] - B_ci[colb];
2449           }
2450         }
2451 
2452         /* loop over columns marking them in rowhit */
2453         fm    = M; /* fm points to first entry in linked list */
2454         for (k=0; k<m; k++) {
2455           currentcol = *rows++;
2456 	  /* is it already in the list? */
2457           do {
2458             mfm  = fm;
2459             fm   = rowhit[fm];
2460           } while (fm < currentcol);
2461           /* not in list so add it */
2462           if (fm != currentcol) {
2463             nrows++;
2464             columnsforrow[currentcol] = col;
2465             /* next three lines insert new entry into linked list */
2466             rowhit[mfm]               = currentcol;
2467             rowhit[currentcol]        = fm;
2468             fm                        = currentcol;
2469             /* fm points to present position in list since we know the columns are sorted */
2470           } else {
2471             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2472           }
2473         }
2474       }
2475       c->nrows[i]         = nrows;
2476       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2477       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2478       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2479       /* now store the linked list of rows into c->rows[i] */
2480       nrows = 0;
2481       fm    = rowhit[M];
2482       do {
2483         c->rows[i][nrows]            = fm;
2484         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2485         fm                           = rowhit[fm];
2486       } while (fm < M);
2487     } /* ---------------------------------------------------------------------------------------*/
2488     ierr = PetscFree(cols);CHKERRQ(ierr);
2489   }
2490 
2491   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2492   /*
2493        vscale will contain the "diagonal" on processor scalings followed by the off processor
2494   */
2495   if (ctype == IS_COLORING_GLOBAL) {
2496     PetscInt *garray;
2497     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2498     for (i=0; i<baij->B->cmap->n/bs; i++) {
2499       for (j=0; j<bs; j++) {
2500         garray[i*bs+j] = bs*baij->garray[i]+j;
2501       }
2502     }
2503     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2504     ierr = PetscFree(garray);CHKERRQ(ierr);
2505     CHKMEMQ;
2506     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2507     for (k=0; k<c->ncolors; k++) {
2508       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2509       for (l=0; l<c->nrows[k]; l++) {
2510         col = c->columnsforrow[k][l];
2511         if (col >= cstart && col < cend) {
2512           /* column is in diagonal block of matrix */
2513           colb = col - cstart;
2514         } else {
2515           /* column  is in "off-processor" part */
2516 #if defined (PETSC_USE_CTABLE)
2517           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2518           colb --;
2519 #else
2520           colb = baij->colmap[col] - 1;
2521 #endif
2522           colb = colb/bs;
2523           colb += cend - cstart;
2524         }
2525         c->vscaleforrow[k][l] = colb;
2526       }
2527     }
2528   } else if (ctype == IS_COLORING_GHOSTED) {
2529     /* Get gtol mapping */
2530     PetscInt N = mat->cmap->N, *gtol;
2531     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2532     for (i=0; i<N; i++) gtol[i] = -1;
2533     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2534 
2535     c->vscale = 0; /* will be created in MatFDColoringApply() */
2536     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2537     for (k=0; k<c->ncolors; k++) {
2538       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2539       for (l=0; l<c->nrows[k]; l++) {
2540         col = c->columnsforrow[k][l];      /* global column index */
2541         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2542       }
2543     }
2544     ierr = PetscFree(gtol);CHKERRQ(ierr);
2545   }
2546   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2547 
2548   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2549   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2550   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2551   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2552     CHKMEMQ;
2553   PetscFunctionReturn(0);
2554 }
2555 
2556 #undef __FUNCT__
2557 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2558 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2559 {
2560   Mat            B;
2561   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2562   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2563   Mat_SeqAIJ     *b;
2564   PetscErrorCode ierr;
2565   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2566   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2567   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2568 
2569   PetscFunctionBegin;
2570   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2571   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2572 
2573   /* ----------------------------------------------------------------
2574      Tell every processor the number of nonzeros per row
2575   */
2576   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2577   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2578     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2579   }
2580   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2581   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2582   displs     = recvcounts + size;
2583   for (i=0; i<size; i++) {
2584     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2585     displs[i]     = A->rmap->range[i]/bs;
2586   }
2587 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2588   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2589 #else
2590   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2591 #endif
2592   /* ---------------------------------------------------------------
2593      Create the sequential matrix of the same type as the local block diagonal
2594   */
2595   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2596   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2597   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2598   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2599   b = (Mat_SeqAIJ *)B->data;
2600 
2601   /*--------------------------------------------------------------------
2602     Copy my part of matrix column indices over
2603   */
2604   sendcount  = ad->nz + bd->nz;
2605   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2606   a_jsendbuf = ad->j;
2607   b_jsendbuf = bd->j;
2608   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2609   cnt        = 0;
2610   for (i=0; i<n; i++) {
2611 
2612     /* put in lower diagonal portion */
2613     m = bd->i[i+1] - bd->i[i];
2614     while (m > 0) {
2615       /* is it above diagonal (in bd (compressed) numbering) */
2616       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2617       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2618       m--;
2619     }
2620 
2621     /* put in diagonal portion */
2622     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2623       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2624     }
2625 
2626     /* put in upper diagonal portion */
2627     while (m-- > 0) {
2628       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2629     }
2630   }
2631   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2632 
2633   /*--------------------------------------------------------------------
2634     Gather all column indices to all processors
2635   */
2636   for (i=0; i<size; i++) {
2637     recvcounts[i] = 0;
2638     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2639       recvcounts[i] += lens[j];
2640     }
2641   }
2642   displs[0]  = 0;
2643   for (i=1; i<size; i++) {
2644     displs[i] = displs[i-1] + recvcounts[i-1];
2645   }
2646 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2647   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2648 #else
2649   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2650 #endif
2651   /*--------------------------------------------------------------------
2652     Assemble the matrix into useable form (note numerical values not yet set)
2653   */
2654   /* set the b->ilen (length of each row) values */
2655   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2656   /* set the b->i indices */
2657   b->i[0] = 0;
2658   for (i=1; i<=A->rmap->N/bs; i++) {
2659     b->i[i] = b->i[i-1] + lens[i-1];
2660   }
2661   ierr = PetscFree(lens);CHKERRQ(ierr);
2662   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2663   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2664   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2665 
2666   if (A->symmetric){
2667     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2668   } else if (A->hermitian) {
2669     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2670   } else if (A->structurally_symmetric) {
2671     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2672   }
2673   *newmat = B;
2674   PetscFunctionReturn(0);
2675 }
2676 
2677 #undef __FUNCT__
2678 #define __FUNCT__ "MatSOR_MPIBAIJ"
2679 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2680 {
2681   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2682   PetscErrorCode ierr;
2683   Vec            bb1 = 0;
2684 
2685   PetscFunctionBegin;
2686   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2687     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2688   }
2689 
2690   if (flag == SOR_APPLY_UPPER) {
2691     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2692     PetscFunctionReturn(0);
2693   }
2694 
2695   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2696     if (flag & SOR_ZERO_INITIAL_GUESS) {
2697       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2698       its--;
2699     }
2700 
2701     while (its--) {
2702       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2703       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2704 
2705       /* update rhs: bb1 = bb - B*x */
2706       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2707       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2708 
2709       /* local sweep */
2710       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2711     }
2712   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2713     if (flag & SOR_ZERO_INITIAL_GUESS) {
2714       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2715       its--;
2716     }
2717     while (its--) {
2718       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2719       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2720 
2721       /* update rhs: bb1 = bb - B*x */
2722       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2723       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2724 
2725       /* local sweep */
2726       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2727     }
2728   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2729     if (flag & SOR_ZERO_INITIAL_GUESS) {
2730       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2731       its--;
2732     }
2733     while (its--) {
2734       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2735       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2736 
2737       /* update rhs: bb1 = bb - B*x */
2738       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2739       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2740 
2741       /* local sweep */
2742       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2743     }
2744   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2745 
2746   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2747   PetscFunctionReturn(0);
2748 }
2749 
2750 extern PetscErrorCode  MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2751 
2752 #undef __FUNCT__
2753 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2754 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,PetscScalar **values)
2755 {
2756   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2757   PetscErrorCode ierr;
2758 
2759   PetscFunctionBegin;
2760   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2761   PetscFunctionReturn(0);
2762 }
2763 
2764 
2765 /* -------------------------------------------------------------------*/
2766 static struct _MatOps MatOps_Values = {
2767        MatSetValues_MPIBAIJ,
2768        MatGetRow_MPIBAIJ,
2769        MatRestoreRow_MPIBAIJ,
2770        MatMult_MPIBAIJ,
2771 /* 4*/ MatMultAdd_MPIBAIJ,
2772        MatMultTranspose_MPIBAIJ,
2773        MatMultTransposeAdd_MPIBAIJ,
2774        0,
2775        0,
2776        0,
2777 /*10*/ 0,
2778        0,
2779        0,
2780        MatSOR_MPIBAIJ,
2781        MatTranspose_MPIBAIJ,
2782 /*15*/ MatGetInfo_MPIBAIJ,
2783        MatEqual_MPIBAIJ,
2784        MatGetDiagonal_MPIBAIJ,
2785        MatDiagonalScale_MPIBAIJ,
2786        MatNorm_MPIBAIJ,
2787 /*20*/ MatAssemblyBegin_MPIBAIJ,
2788        MatAssemblyEnd_MPIBAIJ,
2789        MatSetOption_MPIBAIJ,
2790        MatZeroEntries_MPIBAIJ,
2791 /*24*/ MatZeroRows_MPIBAIJ,
2792        0,
2793        0,
2794        0,
2795        0,
2796 /*29*/ MatSetUpPreallocation_MPIBAIJ,
2797        0,
2798        0,
2799        0,
2800        0,
2801 /*34*/ MatDuplicate_MPIBAIJ,
2802        0,
2803        0,
2804        0,
2805        0,
2806 /*39*/ MatAXPY_MPIBAIJ,
2807        MatGetSubMatrices_MPIBAIJ,
2808        MatIncreaseOverlap_MPIBAIJ,
2809        MatGetValues_MPIBAIJ,
2810        MatCopy_MPIBAIJ,
2811 /*44*/ 0,
2812        MatScale_MPIBAIJ,
2813        0,
2814        0,
2815        0,
2816 /*49*/ MatSetBlockSize_MPIBAIJ,
2817        0,
2818        0,
2819        0,
2820        0,
2821 /*54*/ MatFDColoringCreate_MPIBAIJ,
2822        0,
2823        MatSetUnfactored_MPIBAIJ,
2824        MatPermute_MPIBAIJ,
2825        MatSetValuesBlocked_MPIBAIJ,
2826 /*59*/ MatGetSubMatrix_MPIBAIJ,
2827        MatDestroy_MPIBAIJ,
2828        MatView_MPIBAIJ,
2829        0,
2830        0,
2831 /*64*/ 0,
2832        0,
2833        0,
2834        0,
2835        0,
2836 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2837        0,
2838        0,
2839        0,
2840        0,
2841 /*74*/ 0,
2842        MatFDColoringApply_BAIJ,
2843        0,
2844        0,
2845        0,
2846 /*79*/ 0,
2847        0,
2848        0,
2849        0,
2850        MatLoad_MPIBAIJ,
2851 /*84*/ 0,
2852        0,
2853        0,
2854        0,
2855        0,
2856 /*89*/ 0,
2857        0,
2858        0,
2859        0,
2860        0,
2861 /*94*/ 0,
2862        0,
2863        0,
2864        0,
2865        0,
2866 /*99*/ 0,
2867        0,
2868        0,
2869        0,
2870        0,
2871 /*104*/0,
2872        MatRealPart_MPIBAIJ,
2873        MatImaginaryPart_MPIBAIJ,
2874        0,
2875        0,
2876 /*109*/0,
2877        0,
2878        0,
2879        0,
2880        0,
2881 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2882        0,
2883        MatGetGhosts_MPIBAIJ,
2884        0,
2885        0,
2886 /*119*/0,
2887        0,
2888        0,
2889        0,
2890        0,
2891 /*124*/0,
2892        0,
2893        MatInvertBlockDiagonal_MPIBAIJ
2894 };
2895 
2896 EXTERN_C_BEGIN
2897 #undef __FUNCT__
2898 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2899 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2900 {
2901   PetscFunctionBegin;
2902   *a = ((Mat_MPIBAIJ *)A->data)->A;
2903   PetscFunctionReturn(0);
2904 }
2905 EXTERN_C_END
2906 
2907 EXTERN_C_BEGIN
2908 extern PetscErrorCode  MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2909 EXTERN_C_END
2910 
2911 EXTERN_C_BEGIN
2912 #undef __FUNCT__
2913 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2914 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2915 {
2916   PetscInt       m,rstart,cstart,cend;
2917   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2918   const PetscInt *JJ=0;
2919   PetscScalar    *values=0;
2920   PetscErrorCode ierr;
2921 
2922   PetscFunctionBegin;
2923 
2924   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2925   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2926   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2927   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2928   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2929   m      = B->rmap->n/bs;
2930   rstart = B->rmap->rstart/bs;
2931   cstart = B->cmap->rstart/bs;
2932   cend   = B->cmap->rend/bs;
2933 
2934   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2935   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2936   for (i=0; i<m; i++) {
2937     nz = ii[i+1] - ii[i];
2938     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2939     nz_max = PetscMax(nz_max,nz);
2940     JJ  = jj + ii[i];
2941     for (j=0; j<nz; j++) {
2942       if (*JJ >= cstart) break;
2943       JJ++;
2944     }
2945     d = 0;
2946     for (; j<nz; j++) {
2947       if (*JJ++ >= cend) break;
2948       d++;
2949     }
2950     d_nnz[i] = d;
2951     o_nnz[i] = nz - d;
2952   }
2953   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2954   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2955 
2956   values = (PetscScalar*)V;
2957   if (!values) {
2958     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2959     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2960   }
2961   for (i=0; i<m; i++) {
2962     PetscInt          row    = i + rstart;
2963     PetscInt          ncols  = ii[i+1] - ii[i];
2964     const PetscInt    *icols = jj + ii[i];
2965     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2966     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2967   }
2968 
2969   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2970   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2971   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2972   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2973   PetscFunctionReturn(0);
2974 }
2975 EXTERN_C_END
2976 
2977 #undef __FUNCT__
2978 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2979 /*@C
2980    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2981    (the default parallel PETSc format).
2982 
2983    Collective on MPI_Comm
2984 
2985    Input Parameters:
2986 +  A - the matrix
2987 .  bs - the block size
2988 .  i - the indices into j for the start of each local row (starts with zero)
2989 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2990 -  v - optional values in the matrix
2991 
2992    Level: developer
2993 
2994 .keywords: matrix, aij, compressed row, sparse, parallel
2995 
2996 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2997 @*/
2998 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2999 {
3000   PetscErrorCode ierr;
3001 
3002   PetscFunctionBegin;
3003   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3004   PetscValidType(B,1);
3005   PetscValidLogicalCollectiveInt(B,bs,2);
3006   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3007   PetscFunctionReturn(0);
3008 }
3009 
3010 EXTERN_C_BEGIN
3011 #undef __FUNCT__
3012 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
3013 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
3014 {
3015   Mat_MPIBAIJ    *b;
3016   PetscErrorCode ierr;
3017   PetscInt       i, newbs = PetscAbs(bs);
3018 
3019   PetscFunctionBegin;
3020   if (bs < 0) {
3021     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
3022       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
3023     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3024     bs   = PetscAbs(bs);
3025   }
3026   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
3027   bs = newbs;
3028 
3029 
3030   if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
3031   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3032   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3033   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3034   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3035 
3036   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3037   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3038   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3039   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3040 
3041   if (d_nnz) {
3042     for (i=0; i<B->rmap->n/bs; i++) {
3043       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3044     }
3045   }
3046   if (o_nnz) {
3047     for (i=0; i<B->rmap->n/bs; i++) {
3048       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3049     }
3050   }
3051 
3052   b = (Mat_MPIBAIJ*)B->data;
3053   b->bs2 = bs*bs;
3054   b->mbs = B->rmap->n/bs;
3055   b->nbs = B->cmap->n/bs;
3056   b->Mbs = B->rmap->N/bs;
3057   b->Nbs = B->cmap->N/bs;
3058 
3059   for (i=0; i<=b->size; i++) {
3060     b->rangebs[i] = B->rmap->range[i]/bs;
3061   }
3062   b->rstartbs = B->rmap->rstart/bs;
3063   b->rendbs   = B->rmap->rend/bs;
3064   b->cstartbs = B->cmap->rstart/bs;
3065   b->cendbs   = B->cmap->rend/bs;
3066 
3067   if (!B->preallocated) {
3068     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3069     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3070     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3071     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3072     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3073     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3074     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3075     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3076     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
3077   }
3078 
3079   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3080   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3081   B->preallocated = PETSC_TRUE;
3082   ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3083   PetscFunctionReturn(0);
3084 }
3085 EXTERN_C_END
3086 
3087 EXTERN_C_BEGIN
3088 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3089 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3090 EXTERN_C_END
3091 
3092 
3093 EXTERN_C_BEGIN
3094 #undef __FUNCT__
3095 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3096 PetscErrorCode  MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
3097 {
3098   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3099   PetscErrorCode ierr;
3100   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3101   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3102   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3103 
3104   PetscFunctionBegin;
3105   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3106   ii[0] = 0;
3107   CHKMEMQ;
3108   for (i=0; i<M; i++) {
3109     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3110     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3111     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3112     /* remove one from count of matrix has diagonal */
3113     for (j=id[i]; j<id[i+1]; j++) {
3114       if (jd[j] == i) {ii[i+1]--;break;}
3115     }
3116   CHKMEMQ;
3117   }
3118   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3119   cnt = 0;
3120   for (i=0; i<M; i++) {
3121     for (j=io[i]; j<io[i+1]; j++) {
3122       if (garray[jo[j]] > rstart) break;
3123       jj[cnt++] = garray[jo[j]];
3124   CHKMEMQ;
3125     }
3126     for (k=id[i]; k<id[i+1]; k++) {
3127       if (jd[k] != i) {
3128         jj[cnt++] = rstart + jd[k];
3129   CHKMEMQ;
3130       }
3131     }
3132     for (;j<io[i+1]; j++) {
3133       jj[cnt++] = garray[jo[j]];
3134   CHKMEMQ;
3135     }
3136   }
3137   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
3138   PetscFunctionReturn(0);
3139 }
3140 EXTERN_C_END
3141 
3142 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3143 EXTERN_C_BEGIN
3144 PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*);
3145 EXTERN_C_END
3146 
3147 EXTERN_C_BEGIN
3148 #undef __FUNCT__
3149 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3150 PetscErrorCode  MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat)
3151 {
3152   PetscErrorCode ierr;
3153   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3154   Mat            B;
3155   Mat_MPIAIJ     *b;
3156 
3157   PetscFunctionBegin;
3158   if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled");
3159 
3160   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3161   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3162   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
3163   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3164   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
3165   b = (Mat_MPIAIJ*) B->data;
3166 
3167   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
3168   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3169   ierr = DisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3170   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3171   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3172   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3173   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3174   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3175   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3176   if (reuse == MAT_REUSE_MATRIX) {
3177     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3178   } else {
3179    *newmat = B;
3180   }
3181   PetscFunctionReturn(0);
3182 }
3183 EXTERN_C_END
3184 
3185 EXTERN_C_BEGIN
3186 #if defined(PETSC_HAVE_MUMPS)
3187 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3188 #endif
3189 EXTERN_C_END
3190 
3191 /*MC
3192    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3193 
3194    Options Database Keys:
3195 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3196 . -mat_block_size <bs> - set the blocksize used to store the matrix
3197 - -mat_use_hash_table <fact>
3198 
3199   Level: beginner
3200 
3201 .seealso: MatCreateMPIBAIJ
3202 M*/
3203 
3204 EXTERN_C_BEGIN
3205 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,const MatType,MatReuse,Mat*);
3206 EXTERN_C_END
3207 
3208 EXTERN_C_BEGIN
3209 #undef __FUNCT__
3210 #define __FUNCT__ "MatCreate_MPIBAIJ"
3211 PetscErrorCode  MatCreate_MPIBAIJ(Mat B)
3212 {
3213   Mat_MPIBAIJ    *b;
3214   PetscErrorCode ierr;
3215   PetscBool      flg;
3216 
3217   PetscFunctionBegin;
3218   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3219   B->data = (void*)b;
3220 
3221   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3222   B->assembled  = PETSC_FALSE;
3223 
3224   B->insertmode = NOT_SET_VALUES;
3225   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
3226   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
3227 
3228   /* build local table of row and column ownerships */
3229   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3230 
3231   /* build cache for off array entries formed */
3232   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
3233   b->donotstash  = PETSC_FALSE;
3234   b->colmap      = PETSC_NULL;
3235   b->garray      = PETSC_NULL;
3236   b->roworiented = PETSC_TRUE;
3237 
3238   /* stuff used in block assembly */
3239   b->barray       = 0;
3240 
3241   /* stuff used for matrix vector multiply */
3242   b->lvec         = 0;
3243   b->Mvctx        = 0;
3244 
3245   /* stuff for MatGetRow() */
3246   b->rowindices   = 0;
3247   b->rowvalues    = 0;
3248   b->getrowactive = PETSC_FALSE;
3249 
3250   /* hash table stuff */
3251   b->ht           = 0;
3252   b->hd           = 0;
3253   b->ht_size      = 0;
3254   b->ht_flag      = PETSC_FALSE;
3255   b->ht_fact      = 0;
3256   b->ht_total_ct  = 0;
3257   b->ht_insert_ct = 0;
3258 
3259   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3260   b->ijonly       = PETSC_FALSE;
3261 
3262   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3263     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3264     if (flg) {
3265       PetscReal fact = 1.39;
3266       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3267       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
3268       if (fact <= 1.0) fact = 1.39;
3269       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3270       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3271     }
3272   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3273 
3274 #if defined(PETSC_HAVE_MUMPS)
3275   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3276 #endif
3277   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3278                                      "MatConvert_MPIBAIJ_MPIAdj",
3279                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3280   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",
3281                                      "MatConvert_MPIBAIJ_MPIAIJ",
3282                                       MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3283   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",
3284                                      "MatConvert_MPIBAIJ_MPISBAIJ",
3285                                       MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3286   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3287                                      "MatStoreValues_MPIBAIJ",
3288                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3289   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3290                                      "MatRetrieveValues_MPIBAIJ",
3291                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3292   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3293                                      "MatGetDiagonalBlock_MPIBAIJ",
3294                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3295   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3296                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3297                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3298   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3299 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3300 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3301   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3302                                      "MatDiagonalScaleLocal_MPIBAIJ",
3303                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3304   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3305                                      "MatSetHashTableFactor_MPIBAIJ",
3306                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3307   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",
3308                                      "MatConvert_MPIBAIJ_MPIBSTRM",
3309                                       MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3310   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3311   PetscFunctionReturn(0);
3312 }
3313 EXTERN_C_END
3314 
3315 /*MC
3316    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3317 
3318    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3319    and MATMPIBAIJ otherwise.
3320 
3321    Options Database Keys:
3322 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3323 
3324   Level: beginner
3325 
3326 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3327 M*/
3328 
3329 #undef __FUNCT__
3330 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3331 /*@C
3332    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3333    (block compressed row).  For good matrix assembly performance
3334    the user should preallocate the matrix storage by setting the parameters
3335    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3336    performance can be increased by more than a factor of 50.
3337 
3338    Collective on Mat
3339 
3340    Input Parameters:
3341 +  A - the matrix
3342 .  bs   - size of blockk
3343 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3344            submatrix  (same for all local rows)
3345 .  d_nnz - array containing the number of block nonzeros in the various block rows
3346            of the in diagonal portion of the local (possibly different for each block
3347            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3348 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3349            submatrix (same for all local rows).
3350 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3351            off-diagonal portion of the local submatrix (possibly different for
3352            each block row) or PETSC_NULL.
3353 
3354    If the *_nnz parameter is given then the *_nz parameter is ignored
3355 
3356    Options Database Keys:
3357 +   -mat_block_size - size of the blocks to use
3358 -   -mat_use_hash_table <fact>
3359 
3360    Notes:
3361    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3362    than it must be used on all processors that share the object for that argument.
3363 
3364    Storage Information:
3365    For a square global matrix we define each processor's diagonal portion
3366    to be its local rows and the corresponding columns (a square submatrix);
3367    each processor's off-diagonal portion encompasses the remainder of the
3368    local matrix (a rectangular submatrix).
3369 
3370    The user can specify preallocated storage for the diagonal part of
3371    the local submatrix with either d_nz or d_nnz (not both).  Set
3372    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3373    memory allocation.  Likewise, specify preallocated storage for the
3374    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3375 
3376    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3377    the figure below we depict these three local rows and all columns (0-11).
3378 
3379 .vb
3380            0 1 2 3 4 5 6 7 8 9 10 11
3381           -------------------
3382    row 3  |  o o o d d d o o o o o o
3383    row 4  |  o o o d d d o o o o o o
3384    row 5  |  o o o d d d o o o o o o
3385           -------------------
3386 .ve
3387 
3388    Thus, any entries in the d locations are stored in the d (diagonal)
3389    submatrix, and any entries in the o locations are stored in the
3390    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3391    stored simply in the MATSEQBAIJ format for compressed row storage.
3392 
3393    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3394    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3395    In general, for PDE problems in which most nonzeros are near the diagonal,
3396    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3397    or you will get TERRIBLE performance; see the users' manual chapter on
3398    matrices.
3399 
3400    You can call MatGetInfo() to get information on how effective the preallocation was;
3401    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3402    You can also run with the option -info and look for messages with the string
3403    malloc in them to see if additional memory allocation was needed.
3404 
3405    Level: intermediate
3406 
3407 .keywords: matrix, block, aij, compressed row, sparse, parallel
3408 
3409 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3410 @*/
3411 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3412 {
3413   PetscErrorCode ierr;
3414 
3415   PetscFunctionBegin;
3416   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3417   PetscValidType(B,1);
3418   PetscValidLogicalCollectiveInt(B,bs,2);
3419   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3420   PetscFunctionReturn(0);
3421 }
3422 
3423 #undef __FUNCT__
3424 #define __FUNCT__ "MatCreateMPIBAIJ"
3425 /*@C
3426    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3427    (block compressed row).  For good matrix assembly performance
3428    the user should preallocate the matrix storage by setting the parameters
3429    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3430    performance can be increased by more than a factor of 50.
3431 
3432    Collective on MPI_Comm
3433 
3434    Input Parameters:
3435 +  comm - MPI communicator
3436 .  bs   - size of blockk
3437 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3438            This value should be the same as the local size used in creating the
3439            y vector for the matrix-vector product y = Ax.
3440 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3441            This value should be the same as the local size used in creating the
3442            x vector for the matrix-vector product y = Ax.
3443 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3444 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3445 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3446            submatrix  (same for all local rows)
3447 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3448            of the in diagonal portion of the local (possibly different for each block
3449            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3450 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3451            submatrix (same for all local rows).
3452 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3453            off-diagonal portion of the local submatrix (possibly different for
3454            each block row) or PETSC_NULL.
3455 
3456    Output Parameter:
3457 .  A - the matrix
3458 
3459    Options Database Keys:
3460 +   -mat_block_size - size of the blocks to use
3461 -   -mat_use_hash_table <fact>
3462 
3463    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3464    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3465    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3466 
3467    Notes:
3468    If the *_nnz parameter is given then the *_nz parameter is ignored
3469 
3470    A nonzero block is any block that as 1 or more nonzeros in it
3471 
3472    The user MUST specify either the local or global matrix dimensions
3473    (possibly both).
3474 
3475    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3476    than it must be used on all processors that share the object for that argument.
3477 
3478    Storage Information:
3479    For a square global matrix we define each processor's diagonal portion
3480    to be its local rows and the corresponding columns (a square submatrix);
3481    each processor's off-diagonal portion encompasses the remainder of the
3482    local matrix (a rectangular submatrix).
3483 
3484    The user can specify preallocated storage for the diagonal part of
3485    the local submatrix with either d_nz or d_nnz (not both).  Set
3486    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3487    memory allocation.  Likewise, specify preallocated storage for the
3488    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3489 
3490    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3491    the figure below we depict these three local rows and all columns (0-11).
3492 
3493 .vb
3494            0 1 2 3 4 5 6 7 8 9 10 11
3495           -------------------
3496    row 3  |  o o o d d d o o o o o o
3497    row 4  |  o o o d d d o o o o o o
3498    row 5  |  o o o d d d o o o o o o
3499           -------------------
3500 .ve
3501 
3502    Thus, any entries in the d locations are stored in the d (diagonal)
3503    submatrix, and any entries in the o locations are stored in the
3504    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3505    stored simply in the MATSEQBAIJ format for compressed row storage.
3506 
3507    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3508    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3509    In general, for PDE problems in which most nonzeros are near the diagonal,
3510    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3511    or you will get TERRIBLE performance; see the users' manual chapter on
3512    matrices.
3513 
3514    Level: intermediate
3515 
3516 .keywords: matrix, block, aij, compressed row, sparse, parallel
3517 
3518 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3519 @*/
3520 PetscErrorCode  MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3521 {
3522   PetscErrorCode ierr;
3523   PetscMPIInt    size;
3524 
3525   PetscFunctionBegin;
3526   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3527   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3528   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3529   if (size > 1) {
3530     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3531     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3532   } else {
3533     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3534     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3535   }
3536   PetscFunctionReturn(0);
3537 }
3538 
3539 #undef __FUNCT__
3540 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3541 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3542 {
3543   Mat            mat;
3544   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3545   PetscErrorCode ierr;
3546   PetscInt       len=0;
3547 
3548   PetscFunctionBegin;
3549   *newmat       = 0;
3550   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3551   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3552   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3553   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3554 
3555   mat->factortype   = matin->factortype;
3556   mat->preallocated = PETSC_TRUE;
3557   mat->assembled    = PETSC_TRUE;
3558   mat->insertmode   = NOT_SET_VALUES;
3559 
3560   a      = (Mat_MPIBAIJ*)mat->data;
3561   mat->rmap->bs  = matin->rmap->bs;
3562   a->bs2   = oldmat->bs2;
3563   a->mbs   = oldmat->mbs;
3564   a->nbs   = oldmat->nbs;
3565   a->Mbs   = oldmat->Mbs;
3566   a->Nbs   = oldmat->Nbs;
3567 
3568   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3569   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3570 
3571   a->size         = oldmat->size;
3572   a->rank         = oldmat->rank;
3573   a->donotstash   = oldmat->donotstash;
3574   a->roworiented  = oldmat->roworiented;
3575   a->rowindices   = 0;
3576   a->rowvalues    = 0;
3577   a->getrowactive = PETSC_FALSE;
3578   a->barray       = 0;
3579   a->rstartbs     = oldmat->rstartbs;
3580   a->rendbs       = oldmat->rendbs;
3581   a->cstartbs     = oldmat->cstartbs;
3582   a->cendbs       = oldmat->cendbs;
3583 
3584   /* hash table stuff */
3585   a->ht           = 0;
3586   a->hd           = 0;
3587   a->ht_size      = 0;
3588   a->ht_flag      = oldmat->ht_flag;
3589   a->ht_fact      = oldmat->ht_fact;
3590   a->ht_total_ct  = 0;
3591   a->ht_insert_ct = 0;
3592 
3593   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3594   if (oldmat->colmap) {
3595 #if defined (PETSC_USE_CTABLE)
3596   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3597 #else
3598   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3599   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3600   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3601 #endif
3602   } else a->colmap = 0;
3603 
3604   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3605     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3606     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3607     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3608   } else a->garray = 0;
3609 
3610   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3611   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3612   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3613   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3614   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3615 
3616   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3617   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3618   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3619   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3620   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3621   *newmat = mat;
3622 
3623   PetscFunctionReturn(0);
3624 }
3625 
3626 #undef __FUNCT__
3627 #define __FUNCT__ "MatLoad_MPIBAIJ"
3628 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3629 {
3630   PetscErrorCode ierr;
3631   int            fd;
3632   PetscInt       i,nz,j,rstart,rend;
3633   PetscScalar    *vals,*buf;
3634   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3635   MPI_Status     status;
3636   PetscMPIInt    rank,size,maxnz;
3637   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3638   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3639   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3640   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3641   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3642   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3643 
3644   PetscFunctionBegin;
3645   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3646     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3647   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3648 
3649   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3650   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3651   if (!rank) {
3652     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3653     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3654     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3655   }
3656 
3657   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3658 
3659   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3660   M = header[1]; N = header[2];
3661 
3662   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3663   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3664   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3665 
3666   /* If global sizes are set, check if they are consistent with that given in the file */
3667   if (sizesset) {
3668     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3669   }
3670   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3671   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3672 
3673   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3674 
3675   /*
3676      This code adds extra rows to make sure the number of rows is
3677      divisible by the blocksize
3678   */
3679   Mbs        = M/bs;
3680   extra_rows = bs - M + bs*Mbs;
3681   if (extra_rows == bs) extra_rows = 0;
3682   else                  Mbs++;
3683   if (extra_rows && !rank) {
3684     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3685   }
3686 
3687   /* determine ownership of all rows */
3688   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3689     mbs        = Mbs/size + ((Mbs % size) > rank);
3690     m          = mbs*bs;
3691   } else { /* User set */
3692     m          = newmat->rmap->n;
3693     mbs        = m/bs;
3694   }
3695   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3696   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3697 
3698   /* process 0 needs enough room for process with most rows */
3699   if (!rank) {
3700     mmax = rowners[1];
3701     for (i=2; i<size; i++) {
3702       mmax = PetscMax(mmax,rowners[i]);
3703     }
3704     mmax*=bs;
3705   } else mmax = m;
3706 
3707   rowners[0] = 0;
3708   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3709   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3710   rstart = rowners[rank];
3711   rend   = rowners[rank+1];
3712 
3713   /* distribute row lengths to all processors */
3714   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3715   if (!rank) {
3716     mend = m;
3717     if (size == 1) mend = mend - extra_rows;
3718     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3719     for (j=mend; j<m; j++) locrowlens[j] = 1;
3720     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3721     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3722     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3723     for (j=0; j<m; j++) {
3724       procsnz[0] += locrowlens[j];
3725     }
3726     for (i=1; i<size; i++) {
3727       mend = browners[i+1] - browners[i];
3728       if (i == size-1) mend = mend - extra_rows;
3729       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3730       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3731       /* calculate the number of nonzeros on each processor */
3732       for (j=0; j<browners[i+1]-browners[i]; j++) {
3733         procsnz[i] += rowlengths[j];
3734       }
3735       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3736     }
3737     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3738   } else {
3739     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3740   }
3741 
3742   if (!rank) {
3743     /* determine max buffer needed and allocate it */
3744     maxnz = procsnz[0];
3745     for (i=1; i<size; i++) {
3746       maxnz = PetscMax(maxnz,procsnz[i]);
3747     }
3748     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3749 
3750     /* read in my part of the matrix column indices  */
3751     nz     = procsnz[0];
3752     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3753     mycols = ibuf;
3754     if (size == 1)  nz -= extra_rows;
3755     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3756     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3757 
3758     /* read in every ones (except the last) and ship off */
3759     for (i=1; i<size-1; i++) {
3760       nz   = procsnz[i];
3761       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3762       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3763     }
3764     /* read in the stuff for the last proc */
3765     if (size != 1) {
3766       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3767       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3768       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3769       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3770     }
3771     ierr = PetscFree(cols);CHKERRQ(ierr);
3772   } else {
3773     /* determine buffer space needed for message */
3774     nz = 0;
3775     for (i=0; i<m; i++) {
3776       nz += locrowlens[i];
3777     }
3778     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3779     mycols = ibuf;
3780     /* receive message of column indices*/
3781     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3782     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3783     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3784   }
3785 
3786   /* loop over local rows, determining number of off diagonal entries */
3787   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3788   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3789   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3790   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3791   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3792   rowcount = 0; nzcount = 0;
3793   for (i=0; i<mbs; i++) {
3794     dcount  = 0;
3795     odcount = 0;
3796     for (j=0; j<bs; j++) {
3797       kmax = locrowlens[rowcount];
3798       for (k=0; k<kmax; k++) {
3799         tmp = mycols[nzcount++]/bs;
3800         if (!mask[tmp]) {
3801           mask[tmp] = 1;
3802           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3803           else masked1[dcount++] = tmp;
3804         }
3805       }
3806       rowcount++;
3807     }
3808 
3809     dlens[i]  = dcount;
3810     odlens[i] = odcount;
3811 
3812     /* zero out the mask elements we set */
3813     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3814     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3815   }
3816 
3817 
3818   if (!sizesset) {
3819     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3820   }
3821   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3822 
3823   if (!rank) {
3824     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3825     /* read in my part of the matrix numerical values  */
3826     nz = procsnz[0];
3827     vals = buf;
3828     mycols = ibuf;
3829     if (size == 1)  nz -= extra_rows;
3830     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3831     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3832 
3833     /* insert into matrix */
3834     jj      = rstart*bs;
3835     for (i=0; i<m; i++) {
3836       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3837       mycols += locrowlens[i];
3838       vals   += locrowlens[i];
3839       jj++;
3840     }
3841     /* read in other processors (except the last one) and ship out */
3842     for (i=1; i<size-1; i++) {
3843       nz   = procsnz[i];
3844       vals = buf;
3845       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3846       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3847     }
3848     /* the last proc */
3849     if (size != 1){
3850       nz   = procsnz[i] - extra_rows;
3851       vals = buf;
3852       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3853       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3854       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3855     }
3856     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3857   } else {
3858     /* receive numeric values */
3859     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3860 
3861     /* receive message of values*/
3862     vals   = buf;
3863     mycols = ibuf;
3864     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
3865     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3866     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3867 
3868     /* insert into matrix */
3869     jj      = rstart*bs;
3870     for (i=0; i<m; i++) {
3871       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3872       mycols += locrowlens[i];
3873       vals   += locrowlens[i];
3874       jj++;
3875     }
3876   }
3877   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3878   ierr = PetscFree(buf);CHKERRQ(ierr);
3879   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3880   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3881   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3882   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3883   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3884   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3885 
3886   PetscFunctionReturn(0);
3887 }
3888 
3889 #undef __FUNCT__
3890 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3891 /*@
3892    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3893 
3894    Input Parameters:
3895 .  mat  - the matrix
3896 .  fact - factor
3897 
3898    Not Collective, each process can use a different factor
3899 
3900    Level: advanced
3901 
3902   Notes:
3903    This can also be set by the command line option: -mat_use_hash_table <fact>
3904 
3905 .keywords: matrix, hashtable, factor, HT
3906 
3907 .seealso: MatSetOption()
3908 @*/
3909 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3910 {
3911   PetscErrorCode ierr;
3912 
3913   PetscFunctionBegin;
3914   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3915   PetscFunctionReturn(0);
3916 }
3917 
3918 EXTERN_C_BEGIN
3919 #undef __FUNCT__
3920 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3921 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3922 {
3923   Mat_MPIBAIJ *baij;
3924 
3925   PetscFunctionBegin;
3926   baij = (Mat_MPIBAIJ*)mat->data;
3927   baij->ht_fact = fact;
3928   PetscFunctionReturn(0);
3929 }
3930 EXTERN_C_END
3931 
3932 #undef __FUNCT__
3933 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3934 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3935 {
3936   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3937   PetscFunctionBegin;
3938   *Ad     = a->A;
3939   *Ao     = a->B;
3940   *colmap = a->garray;
3941   PetscFunctionReturn(0);
3942 }
3943 
3944 /*
3945     Special version for direct calls from Fortran (to eliminate two function call overheads
3946 */
3947 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3948 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3949 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3950 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3951 #endif
3952 
3953 #undef __FUNCT__
3954 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3955 /*@C
3956   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3957 
3958   Collective on Mat
3959 
3960   Input Parameters:
3961 + mat - the matrix
3962 . min - number of input rows
3963 . im - input rows
3964 . nin - number of input columns
3965 . in - input columns
3966 . v - numerical values input
3967 - addvin - INSERT_VALUES or ADD_VALUES
3968 
3969   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3970 
3971   Level: advanced
3972 
3973 .seealso:   MatSetValuesBlocked()
3974 @*/
3975 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3976 {
3977   /* convert input arguments to C version */
3978   Mat             mat = *matin;
3979   PetscInt        m = *min, n = *nin;
3980   InsertMode      addv = *addvin;
3981 
3982   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3983   const MatScalar *value;
3984   MatScalar       *barray=baij->barray;
3985   PetscBool       roworiented = baij->roworiented;
3986   PetscErrorCode  ierr;
3987   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3988   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3989   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3990 
3991   PetscFunctionBegin;
3992   /* tasks normally handled by MatSetValuesBlocked() */
3993   if (mat->insertmode == NOT_SET_VALUES) {
3994     mat->insertmode = addv;
3995   }
3996 #if defined(PETSC_USE_DEBUG)
3997   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3998   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3999 #endif
4000   if (mat->assembled) {
4001     mat->was_assembled = PETSC_TRUE;
4002     mat->assembled     = PETSC_FALSE;
4003   }
4004   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4005 
4006 
4007   if(!barray) {
4008     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
4009     baij->barray = barray;
4010   }
4011 
4012   if (roworiented) {
4013     stepval = (n-1)*bs;
4014   } else {
4015     stepval = (m-1)*bs;
4016   }
4017   for (i=0; i<m; i++) {
4018     if (im[i] < 0) continue;
4019 #if defined(PETSC_USE_DEBUG)
4020     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
4021 #endif
4022     if (im[i] >= rstart && im[i] < rend) {
4023       row = im[i] - rstart;
4024       for (j=0; j<n; j++) {
4025         /* If NumCol = 1 then a copy is not required */
4026         if ((roworiented) && (n == 1)) {
4027           barray = (MatScalar*)v + i*bs2;
4028         } else if((!roworiented) && (m == 1)) {
4029           barray = (MatScalar*)v + j*bs2;
4030         } else { /* Here a copy is required */
4031           if (roworiented) {
4032             value = v + i*(stepval+bs)*bs + j*bs;
4033           } else {
4034             value = v + j*(stepval+bs)*bs + i*bs;
4035           }
4036           for (ii=0; ii<bs; ii++,value+=stepval) {
4037             for (jj=0; jj<bs; jj++) {
4038               *barray++  = *value++;
4039             }
4040           }
4041           barray -=bs2;
4042         }
4043 
4044         if (in[j] >= cstart && in[j] < cend){
4045           col  = in[j] - cstart;
4046           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4047         }
4048         else if (in[j] < 0) continue;
4049 #if defined(PETSC_USE_DEBUG)
4050         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
4051 #endif
4052         else {
4053           if (mat->was_assembled) {
4054             if (!baij->colmap) {
4055               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
4056             }
4057 
4058 #if defined(PETSC_USE_DEBUG)
4059 #if defined (PETSC_USE_CTABLE)
4060             { PetscInt data;
4061               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4062               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4063             }
4064 #else
4065             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4066 #endif
4067 #endif
4068 #if defined (PETSC_USE_CTABLE)
4069 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4070             col  = (col - 1)/bs;
4071 #else
4072             col = (baij->colmap[in[j]] - 1)/bs;
4073 #endif
4074             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4075               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4076               col =  in[j];
4077             }
4078           }
4079           else col = in[j];
4080           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4081         }
4082       }
4083     } else {
4084       if (!baij->donotstash) {
4085         if (roworiented) {
4086           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4087         } else {
4088           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4089         }
4090       }
4091     }
4092   }
4093 
4094   /* task normally handled by MatSetValuesBlocked() */
4095   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4096   PetscFunctionReturn(0);
4097 }
4098 
4099 #undef __FUNCT__
4100 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4101 /*@
4102      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4103          CSR format the local rows.
4104 
4105    Collective on MPI_Comm
4106 
4107    Input Parameters:
4108 +  comm - MPI communicator
4109 .  bs - the block size, only a block size of 1 is supported
4110 .  m - number of local rows (Cannot be PETSC_DECIDE)
4111 .  n - This value should be the same as the local size used in creating the
4112        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4113        calculated if N is given) For square matrices n is almost always m.
4114 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4115 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4116 .   i - row indices
4117 .   j - column indices
4118 -   a - matrix values
4119 
4120    Output Parameter:
4121 .   mat - the matrix
4122 
4123    Level: intermediate
4124 
4125    Notes:
4126        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4127      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4128      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4129 
4130        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4131 
4132 .keywords: matrix, aij, compressed row, sparse, parallel
4133 
4134 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4135           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
4136 @*/
4137 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4138 {
4139   PetscErrorCode ierr;
4140 
4141 
4142  PetscFunctionBegin;
4143   if (i[0]) {
4144     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4145   }
4146   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4147   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4148   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4149   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4150   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4151   PetscFunctionReturn(0);
4152 }
4153