xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 8ddb5d8b44839bc6385c771bd2d6d5c8cd353003)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 
5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 extern PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
17 {
18   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
19   PetscErrorCode ierr;
20   PetscInt       i,*idxb = 0;
21   PetscScalar    *va,*vb;
22   Vec            vtmp;
23 
24   PetscFunctionBegin;
25   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
26   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
27   if (idx) {
28     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
29   }
30 
31   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
32   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
33   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
34   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
35 
36   for (i=0; i<A->rmap->n; i++){
37     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 EXTERN_C_BEGIN
48 #undef __FUNCT__
49 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
50 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
51 {
52   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
57   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 EXTERN_C_END
61 
62 EXTERN_C_BEGIN
63 #undef __FUNCT__
64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
65 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
66 {
67   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
68   PetscErrorCode ierr;
69 
70   PetscFunctionBegin;
71   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
72   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
73   PetscFunctionReturn(0);
74 }
75 EXTERN_C_END
76 
77 /*
78      Local utility routine that creates a mapping from the global column
79    number to the local number in the off-diagonal part of the local
80    storage of the matrix.  This is done in a non scalable way since the
81    length of colmap equals the global matrix length.
82 */
83 #undef __FUNCT__
84 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
85 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
86 {
87   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
88   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
89   PetscErrorCode ierr;
90   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
91 
92   PetscFunctionBegin;
93 #if defined (PETSC_USE_CTABLE)
94   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
95   for (i=0; i<nbs; i++){
96     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
97   }
98 #else
99   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
100   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
101   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
102   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
103 #endif
104   PetscFunctionReturn(0);
105 }
106 
107 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
108 { \
109  \
110     brow = row/bs;  \
111     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
112     rmax = aimax[brow]; nrow = ailen[brow]; \
113       bcol = col/bs; \
114       ridx = row % bs; cidx = col % bs; \
115       low = 0; high = nrow; \
116       while (high-low > 3) { \
117         t = (low+high)/2; \
118         if (rp[t] > bcol) high = t; \
119         else              low  = t; \
120       } \
121       for (_i=low; _i<high; _i++) { \
122         if (rp[_i] > bcol) break; \
123         if (rp[_i] == bcol) { \
124           bap  = ap +  bs2*_i + bs*cidx + ridx; \
125           if (addv == ADD_VALUES) *bap += value;  \
126           else                    *bap  = value;  \
127           goto a_noinsert; \
128         } \
129       } \
130       if (a->nonew == 1) goto a_noinsert; \
131       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
132       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
133       N = nrow++ - 1;  \
134       /* shift up all the later entries in this row */ \
135       for (ii=N; ii>=_i; ii--) { \
136         rp[ii+1] = rp[ii]; \
137         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
138       } \
139       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
140       rp[_i]                      = bcol;  \
141       ap[bs2*_i + bs*cidx + ridx] = value;  \
142       a_noinsert:; \
143     ailen[brow] = nrow; \
144 }
145 
146 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
147 { \
148     brow = row/bs;  \
149     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
150     rmax = bimax[brow]; nrow = bilen[brow]; \
151       bcol = col/bs; \
152       ridx = row % bs; cidx = col % bs; \
153       low = 0; high = nrow; \
154       while (high-low > 3) { \
155         t = (low+high)/2; \
156         if (rp[t] > bcol) high = t; \
157         else              low  = t; \
158       } \
159       for (_i=low; _i<high; _i++) { \
160         if (rp[_i] > bcol) break; \
161         if (rp[_i] == bcol) { \
162           bap  = ap +  bs2*_i + bs*cidx + ridx; \
163           if (addv == ADD_VALUES) *bap += value;  \
164           else                    *bap  = value;  \
165           goto b_noinsert; \
166         } \
167       } \
168       if (b->nonew == 1) goto b_noinsert; \
169       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
170       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
171       CHKMEMQ;\
172       N = nrow++ - 1;  \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
177       } \
178       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
179       rp[_i]                      = bcol;  \
180       ap[bs2*_i + bs*cidx + ridx] = value;  \
181       b_noinsert:; \
182     bilen[brow] = nrow; \
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatSetValues_MPIBAIJ"
187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
188 {
189   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
190   MatScalar      value;
191   PetscBool      roworiented = baij->roworiented;
192   PetscErrorCode ierr;
193   PetscInt       i,j,row,col;
194   PetscInt       rstart_orig=mat->rmap->rstart;
195   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
196   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
197 
198   /* Some Variables required in the macro */
199   Mat            A = baij->A;
200   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
201   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
202   MatScalar      *aa=a->a;
203 
204   Mat            B = baij->B;
205   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
206   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
207   MatScalar      *ba=b->a;
208 
209   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
210   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
211   MatScalar      *ap,*bap;
212 
213   PetscFunctionBegin;
214   if (v) PetscValidScalarPointer(v,6);
215   for (i=0; i<m; i++) {
216     if (im[i] < 0) continue;
217 #if defined(PETSC_USE_DEBUG)
218     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
219 #endif
220     if (im[i] >= rstart_orig && im[i] < rend_orig) {
221       row = im[i] - rstart_orig;
222       for (j=0; j<n; j++) {
223         if (in[j] >= cstart_orig && in[j] < cend_orig){
224           col = in[j] - cstart_orig;
225           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
226           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
227           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
228         } else if (in[j] < 0) continue;
229 #if defined(PETSC_USE_DEBUG)
230         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
231 #endif
232         else {
233           if (mat->was_assembled) {
234             if (!baij->colmap) {
235               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
236             }
237 #if defined (PETSC_USE_CTABLE)
238             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
239             col  = col - 1;
240 #else
241             col = baij->colmap[in[j]/bs] - 1;
242 #endif
243             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
244               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
245               col =  in[j];
246               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
247               B = baij->B;
248               b = (Mat_SeqBAIJ*)(B)->data;
249               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
250               ba=b->a;
251             } else col += in[j]%bs;
252           } else col = in[j];
253           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
254           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
255           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
256         }
257       }
258     } else {
259       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
260       if (!baij->donotstash) {
261         if (roworiented) {
262           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
263         } else {
264           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
265         }
266       }
267     }
268   }
269   PetscFunctionReturn(0);
270 }
271 
272 #undef __FUNCT__
273 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
274 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
275 {
276   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
277   const PetscScalar *value;
278   MatScalar         *barray=baij->barray;
279   PetscBool         roworiented = baij->roworiented;
280   PetscErrorCode    ierr;
281   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
282   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
283   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
284 
285   PetscFunctionBegin;
286   if(!barray) {
287     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
288     baij->barray = barray;
289   }
290 
291   if (roworiented) {
292     stepval = (n-1)*bs;
293   } else {
294     stepval = (m-1)*bs;
295   }
296   for (i=0; i<m; i++) {
297     if (im[i] < 0) continue;
298 #if defined(PETSC_USE_DEBUG)
299     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
300 #endif
301     if (im[i] >= rstart && im[i] < rend) {
302       row = im[i] - rstart;
303       for (j=0; j<n; j++) {
304         /* If NumCol = 1 then a copy is not required */
305         if ((roworiented) && (n == 1)) {
306           barray = (MatScalar*)v + i*bs2;
307         } else if((!roworiented) && (m == 1)) {
308           barray = (MatScalar*)v + j*bs2;
309         } else { /* Here a copy is required */
310           if (roworiented) {
311             value = v + (i*(stepval+bs) + j)*bs;
312           } else {
313 	    value = v + (j*(stepval+bs) + i)*bs;
314           }
315           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
316             for (jj=0; jj<bs; jj++) {
317               barray[jj]  = value[jj];
318             }
319             barray += bs;
320           }
321           barray -= bs2;
322         }
323 
324         if (in[j] >= cstart && in[j] < cend){
325           col  = in[j] - cstart;
326           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
327         }
328         else if (in[j] < 0) continue;
329 #if defined(PETSC_USE_DEBUG)
330         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
331 #endif
332         else {
333           if (mat->was_assembled) {
334             if (!baij->colmap) {
335               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
336             }
337 
338 #if defined(PETSC_USE_DEBUG)
339 #if defined (PETSC_USE_CTABLE)
340             { PetscInt data;
341               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
342               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
343             }
344 #else
345             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346 #endif
347 #endif
348 #if defined (PETSC_USE_CTABLE)
349 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
350             col  = (col - 1)/bs;
351 #else
352             col = (baij->colmap[in[j]] - 1)/bs;
353 #endif
354             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
355               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
356               col =  in[j];
357             }
358           }
359           else col = in[j];
360           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
361         }
362       }
363     } else {
364       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
365       if (!baij->donotstash) {
366         if (roworiented) {
367           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
368         } else {
369           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
370         }
371       }
372     }
373   }
374   PetscFunctionReturn(0);
375 }
376 
377 #define HASH_KEY 0.6180339887
378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
379 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
380 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
381 #undef __FUNCT__
382 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
383 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
384 {
385   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
386   PetscBool      roworiented = baij->roworiented;
387   PetscErrorCode ierr;
388   PetscInt       i,j,row,col;
389   PetscInt       rstart_orig=mat->rmap->rstart;
390   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
391   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
392   PetscReal      tmp;
393   MatScalar      **HD = baij->hd,value;
394 #if defined(PETSC_USE_DEBUG)
395   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
396 #endif
397 
398   PetscFunctionBegin;
399   if (v) PetscValidScalarPointer(v,6);
400   for (i=0; i<m; i++) {
401 #if defined(PETSC_USE_DEBUG)
402     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
403     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
404 #endif
405       row = im[i];
406     if (row >= rstart_orig && row < rend_orig) {
407       for (j=0; j<n; j++) {
408         col = in[j];
409         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
410         /* Look up PetscInto the Hash Table */
411         key = (row/bs)*Nbs+(col/bs)+1;
412         h1  = HASH(size,key,tmp);
413 
414 
415         idx = h1;
416 #if defined(PETSC_USE_DEBUG)
417         insert_ct++;
418         total_ct++;
419         if (HT[idx] != key) {
420           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
421           if (idx == size) {
422             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
423             if (idx == h1) {
424               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
425             }
426           }
427         }
428 #else
429         if (HT[idx] != key) {
430           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
431           if (idx == size) {
432             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
433             if (idx == h1) {
434               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
435             }
436           }
437         }
438 #endif
439         /* A HASH table entry is found, so insert the values at the correct address */
440         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
441         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
442       }
443     } else {
444       if (!baij->donotstash) {
445         if (roworiented) {
446           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
447         } else {
448           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
449         }
450       }
451     }
452   }
453 #if defined(PETSC_USE_DEBUG)
454   baij->ht_total_ct = total_ct;
455   baij->ht_insert_ct = insert_ct;
456 #endif
457   PetscFunctionReturn(0);
458 }
459 
460 #undef __FUNCT__
461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
463 {
464   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
465   PetscBool         roworiented = baij->roworiented;
466   PetscErrorCode    ierr;
467   PetscInt          i,j,ii,jj,row,col;
468   PetscInt          rstart=baij->rstartbs;
469   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
470   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
471   PetscReal         tmp;
472   MatScalar         **HD = baij->hd,*baij_a;
473   const PetscScalar *v_t,*value;
474 #if defined(PETSC_USE_DEBUG)
475   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
476 #endif
477 
478   PetscFunctionBegin;
479 
480   if (roworiented) {
481     stepval = (n-1)*bs;
482   } else {
483     stepval = (m-1)*bs;
484   }
485   for (i=0; i<m; i++) {
486 #if defined(PETSC_USE_DEBUG)
487     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
488     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
489 #endif
490     row   = im[i];
491     v_t   = v + i*nbs2;
492     if (row >= rstart && row < rend) {
493       for (j=0; j<n; j++) {
494         col = in[j];
495 
496         /* Look up into the Hash Table */
497         key = row*Nbs+col+1;
498         h1  = HASH(size,key,tmp);
499 
500         idx = h1;
501 #if defined(PETSC_USE_DEBUG)
502         total_ct++;
503         insert_ct++;
504        if (HT[idx] != key) {
505           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
506           if (idx == size) {
507             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
508             if (idx == h1) {
509               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
510             }
511           }
512         }
513 #else
514         if (HT[idx] != key) {
515           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
516           if (idx == size) {
517             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
518             if (idx == h1) {
519               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
520             }
521           }
522         }
523 #endif
524         baij_a = HD[idx];
525         if (roworiented) {
526           /*value = v + i*(stepval+bs)*bs + j*bs;*/
527           /* value = v + (i*(stepval+bs)+j)*bs; */
528           value = v_t;
529           v_t  += bs;
530           if (addv == ADD_VALUES) {
531             for (ii=0; ii<bs; ii++,value+=stepval) {
532               for (jj=ii; jj<bs2; jj+=bs) {
533                 baij_a[jj]  += *value++;
534               }
535             }
536           } else {
537             for (ii=0; ii<bs; ii++,value+=stepval) {
538               for (jj=ii; jj<bs2; jj+=bs) {
539                 baij_a[jj]  = *value++;
540               }
541             }
542           }
543         } else {
544           value = v + j*(stepval+bs)*bs + i*bs;
545           if (addv == ADD_VALUES) {
546             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
547               for (jj=0; jj<bs; jj++) {
548                 baij_a[jj]  += *value++;
549               }
550             }
551           } else {
552             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
553               for (jj=0; jj<bs; jj++) {
554                 baij_a[jj]  = *value++;
555               }
556             }
557           }
558         }
559       }
560     } else {
561       if (!baij->donotstash) {
562         if (roworiented) {
563           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
564         } else {
565           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
566         }
567       }
568     }
569   }
570 #if defined(PETSC_USE_DEBUG)
571   baij->ht_total_ct = total_ct;
572   baij->ht_insert_ct = insert_ct;
573 #endif
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatGetValues_MPIBAIJ"
579 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
580 {
581   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
582   PetscErrorCode ierr;
583   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
584   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
585 
586   PetscFunctionBegin;
587   for (i=0; i<m; i++) {
588     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
589     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
590     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
591       row = idxm[i] - bsrstart;
592       for (j=0; j<n; j++) {
593         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
594         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
595         if (idxn[j] >= bscstart && idxn[j] < bscend){
596           col = idxn[j] - bscstart;
597           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
598         } else {
599           if (!baij->colmap) {
600             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
601           }
602 #if defined (PETSC_USE_CTABLE)
603           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
604           data --;
605 #else
606           data = baij->colmap[idxn[j]/bs]-1;
607 #endif
608           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
609           else {
610             col  = data + idxn[j]%bs;
611             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
612           }
613         }
614       }
615     } else {
616       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
617     }
618   }
619  PetscFunctionReturn(0);
620 }
621 
622 #undef __FUNCT__
623 #define __FUNCT__ "MatNorm_MPIBAIJ"
624 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
625 {
626   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
627   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
628   PetscErrorCode ierr;
629   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
630   PetscReal      sum = 0.0;
631   MatScalar      *v;
632 
633   PetscFunctionBegin;
634   if (baij->size == 1) {
635     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
636   } else {
637     if (type == NORM_FROBENIUS) {
638       v = amat->a;
639       nz = amat->nz*bs2;
640       for (i=0; i<nz; i++) {
641 #if defined(PETSC_USE_COMPLEX)
642         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
643 #else
644         sum += (*v)*(*v); v++;
645 #endif
646       }
647       v = bmat->a;
648       nz = bmat->nz*bs2;
649       for (i=0; i<nz; i++) {
650 #if defined(PETSC_USE_COMPLEX)
651         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
652 #else
653         sum += (*v)*(*v); v++;
654 #endif
655       }
656       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
657       *nrm = PetscSqrtReal(*nrm);
658     } else if (type == NORM_1) { /* max column sum */
659       PetscReal *tmp,*tmp2;
660       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
661       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
662       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
663       v = amat->a; jj = amat->j;
664       for (i=0; i<amat->nz; i++) {
665         for (j=0; j<bs; j++){
666           col = bs*(cstart + *jj) + j; /* column index */
667           for (row=0; row<bs; row++){
668             tmp[col] += PetscAbsScalar(*v);  v++;
669           }
670         }
671         jj++;
672       }
673       v = bmat->a; jj = bmat->j;
674       for (i=0; i<bmat->nz; i++) {
675         for (j=0; j<bs; j++){
676           col = bs*garray[*jj] + j;
677           for (row=0; row<bs; row++){
678             tmp[col] += PetscAbsScalar(*v); v++;
679           }
680         }
681         jj++;
682       }
683       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
684       *nrm = 0.0;
685       for (j=0; j<mat->cmap->N; j++) {
686         if (tmp2[j] > *nrm) *nrm = tmp2[j];
687       }
688       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
689     } else if (type == NORM_INFINITY) { /* max row sum */
690       PetscReal *sums;
691       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
692       sum = 0.0;
693       for (j=0; j<amat->mbs; j++) {
694         for (row=0; row<bs; row++) sums[row] = 0.0;
695         v = amat->a + bs2*amat->i[j];
696         nz = amat->i[j+1]-amat->i[j];
697         for (i=0; i<nz; i++) {
698           for (col=0; col<bs; col++){
699             for (row=0; row<bs; row++){
700               sums[row] += PetscAbsScalar(*v); v++;
701             }
702           }
703         }
704         v = bmat->a + bs2*bmat->i[j];
705         nz = bmat->i[j+1]-bmat->i[j];
706         for (i=0; i<nz; i++) {
707           for (col=0; col<bs; col++){
708             for (row=0; row<bs; row++){
709               sums[row] += PetscAbsScalar(*v); v++;
710             }
711           }
712         }
713         for (row=0; row<bs; row++){
714           if (sums[row] > sum) sum = sums[row];
715         }
716       }
717       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
718       ierr = PetscFree(sums);CHKERRQ(ierr);
719     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
720   }
721   PetscFunctionReturn(0);
722 }
723 
724 /*
725   Creates the hash table, and sets the table
726   This table is created only once.
727   If new entried need to be added to the matrix
728   then the hash table has to be destroyed and
729   recreated.
730 */
731 #undef __FUNCT__
732 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
733 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
734 {
735   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
736   Mat            A = baij->A,B=baij->B;
737   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
738   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
739   PetscErrorCode ierr;
740   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
741   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
742   PetscInt       *HT,key;
743   MatScalar      **HD;
744   PetscReal      tmp;
745 #if defined(PETSC_USE_INFO)
746   PetscInt       ct=0,max=0;
747 #endif
748 
749   PetscFunctionBegin;
750   if (baij->ht) PetscFunctionReturn(0);
751 
752   baij->ht_size = (PetscInt)(factor*nz);
753   ht_size       = baij->ht_size;
754 
755   /* Allocate Memory for Hash Table */
756   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
757   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
758   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
759   HD   = baij->hd;
760   HT   = baij->ht;
761 
762   /* Loop Over A */
763   for (i=0; i<a->mbs; i++) {
764     for (j=ai[i]; j<ai[i+1]; j++) {
765       row = i+rstart;
766       col = aj[j]+cstart;
767 
768       key = row*Nbs + col + 1;
769       h1  = HASH(ht_size,key,tmp);
770       for (k=0; k<ht_size; k++){
771         if (!HT[(h1+k)%ht_size]) {
772           HT[(h1+k)%ht_size] = key;
773           HD[(h1+k)%ht_size] = a->a + j*bs2;
774           break;
775 #if defined(PETSC_USE_INFO)
776         } else {
777           ct++;
778 #endif
779         }
780       }
781 #if defined(PETSC_USE_INFO)
782       if (k> max) max = k;
783 #endif
784     }
785   }
786   /* Loop Over B */
787   for (i=0; i<b->mbs; i++) {
788     for (j=bi[i]; j<bi[i+1]; j++) {
789       row = i+rstart;
790       col = garray[bj[j]];
791       key = row*Nbs + col + 1;
792       h1  = HASH(ht_size,key,tmp);
793       for (k=0; k<ht_size; k++){
794         if (!HT[(h1+k)%ht_size]) {
795           HT[(h1+k)%ht_size] = key;
796           HD[(h1+k)%ht_size] = b->a + j*bs2;
797           break;
798 #if defined(PETSC_USE_INFO)
799         } else {
800           ct++;
801 #endif
802         }
803       }
804 #if defined(PETSC_USE_INFO)
805       if (k> max) max = k;
806 #endif
807     }
808   }
809 
810   /* Print Summary */
811 #if defined(PETSC_USE_INFO)
812   for (i=0,j=0; i<ht_size; i++) {
813     if (HT[i]) {j++;}
814   }
815   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
816 #endif
817   PetscFunctionReturn(0);
818 }
819 
820 #undef __FUNCT__
821 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
822 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
823 {
824   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
825   PetscErrorCode ierr;
826   PetscInt       nstash,reallocs;
827   InsertMode     addv;
828 
829   PetscFunctionBegin;
830   if (baij->donotstash || mat->nooffprocentries) {
831     PetscFunctionReturn(0);
832   }
833 
834   /* make sure all processors are either in INSERTMODE or ADDMODE */
835   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
836   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
837   mat->insertmode = addv; /* in case this processor had no cache */
838 
839   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
840   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
841   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
842   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
843   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
844   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
845   PetscFunctionReturn(0);
846 }
847 
848 #undef __FUNCT__
849 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
850 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
851 {
852   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
853   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
854   PetscErrorCode ierr;
855   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
856   PetscInt       *row,*col;
857   PetscBool      r1,r2,r3,other_disassembled;
858   MatScalar      *val;
859   InsertMode     addv = mat->insertmode;
860   PetscMPIInt    n;
861 
862   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
863   PetscFunctionBegin;
864   if (!baij->donotstash && !mat->nooffprocentries) {
865     while (1) {
866       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
867       if (!flg) break;
868 
869       for (i=0; i<n;) {
870         /* Now identify the consecutive vals belonging to the same row */
871         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
872         if (j < n) ncols = j-i;
873         else       ncols = n-i;
874         /* Now assemble all these values with a single function call */
875         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
876         i = j;
877       }
878     }
879     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
880     /* Now process the block-stash. Since the values are stashed column-oriented,
881        set the roworiented flag to column oriented, and after MatSetValues()
882        restore the original flags */
883     r1 = baij->roworiented;
884     r2 = a->roworiented;
885     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
886     baij->roworiented = PETSC_FALSE;
887     a->roworiented    = PETSC_FALSE;
888     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
889     while (1) {
890       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
891       if (!flg) break;
892 
893       for (i=0; i<n;) {
894         /* Now identify the consecutive vals belonging to the same row */
895         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
896         if (j < n) ncols = j-i;
897         else       ncols = n-i;
898         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
899         i = j;
900       }
901     }
902     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
903     baij->roworiented = r1;
904     a->roworiented    = r2;
905     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
906   }
907 
908   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
909   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
910 
911   /* determine if any processor has disassembled, if so we must
912      also disassemble ourselfs, in order that we may reassemble. */
913   /*
914      if nonzero structure of submatrix B cannot change then we know that
915      no processor disassembled thus we can skip this stuff
916   */
917   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
918     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
919     if (mat->was_assembled && !other_disassembled) {
920       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
921     }
922   }
923 
924   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
925     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
926   }
927   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
928   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
929   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
930 
931 #if defined(PETSC_USE_INFO)
932   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
933     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
934     baij->ht_total_ct  = 0;
935     baij->ht_insert_ct = 0;
936   }
937 #endif
938   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
939     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
940     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
941     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
942   }
943 
944   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
945   baij->rowvalues = 0;
946   PetscFunctionReturn(0);
947 }
948 
949 #undef __FUNCT__
950 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
951 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
952 {
953   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
954   PetscErrorCode    ierr;
955   PetscMPIInt       size = baij->size,rank = baij->rank;
956   PetscInt          bs = mat->rmap->bs;
957   PetscBool         iascii,isdraw;
958   PetscViewer       sviewer;
959   PetscViewerFormat format;
960 
961   PetscFunctionBegin;
962   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
963   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
964   if (iascii) {
965     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
966     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
967       MatInfo info;
968       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
969       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
970       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
971       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
972              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
973       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
974       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
975       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
976       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
977       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
978       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
979       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
980       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
981       PetscFunctionReturn(0);
982     } else if (format == PETSC_VIEWER_ASCII_INFO) {
983       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
984       PetscFunctionReturn(0);
985     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
986       PetscFunctionReturn(0);
987     }
988   }
989 
990   if (isdraw) {
991     PetscDraw       draw;
992     PetscBool  isnull;
993     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
994     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
995   }
996 
997   if (size == 1) {
998     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
999     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1000   } else {
1001     /* assemble the entire matrix onto first processor. */
1002     Mat         A;
1003     Mat_SeqBAIJ *Aloc;
1004     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1005     MatScalar   *a;
1006 
1007     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1008     /* Perhaps this should be the type of mat? */
1009     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1010     if (!rank) {
1011       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1012     } else {
1013       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1014     }
1015     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1016     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1017     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1018     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1019 
1020     /* copy over the A part */
1021     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1022     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1023     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1024 
1025     for (i=0; i<mbs; i++) {
1026       rvals[0] = bs*(baij->rstartbs + i);
1027       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1028       for (j=ai[i]; j<ai[i+1]; j++) {
1029         col = (baij->cstartbs+aj[j])*bs;
1030         for (k=0; k<bs; k++) {
1031           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1032           col++; a += bs;
1033         }
1034       }
1035     }
1036     /* copy over the B part */
1037     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1038     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1039     for (i=0; i<mbs; i++) {
1040       rvals[0] = bs*(baij->rstartbs + i);
1041       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1042       for (j=ai[i]; j<ai[i+1]; j++) {
1043         col = baij->garray[aj[j]]*bs;
1044         for (k=0; k<bs; k++) {
1045           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1046           col++; a += bs;
1047         }
1048       }
1049     }
1050     ierr = PetscFree(rvals);CHKERRQ(ierr);
1051     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1052     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1053     /*
1054        Everyone has to call to draw the matrix since the graphics waits are
1055        synchronized across all processors that share the PetscDraw object
1056     */
1057     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1058     if (!rank) {
1059       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1060     /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1061       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1062       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1063     }
1064     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1065     ierr = MatDestroy(&A);CHKERRQ(ierr);
1066   }
1067   PetscFunctionReturn(0);
1068 }
1069 
1070 #undef __FUNCT__
1071 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1072 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1073 {
1074   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1075   Mat_SeqBAIJ*   A = (Mat_SeqBAIJ*)a->A->data;
1076   Mat_SeqBAIJ*   B = (Mat_SeqBAIJ*)a->B->data;
1077   PetscErrorCode ierr;
1078   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1079   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1080   int            fd;
1081   PetscScalar    *column_values;
1082   FILE           *file;
1083   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1084   PetscInt       message_count,flowcontrolcount;
1085 
1086   PetscFunctionBegin;
1087   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1088   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1089   nz   = bs2*(A->nz + B->nz);
1090   rlen = mat->rmap->n;
1091   if (!rank) {
1092     header[0] = MAT_FILE_CLASSID;
1093     header[1] = mat->rmap->N;
1094     header[2] = mat->cmap->N;
1095     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1096     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1097     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1098     /* get largest number of rows any processor has */
1099     range = mat->rmap->range;
1100     for (i=1; i<size; i++) {
1101       rlen = PetscMax(rlen,range[i+1] - range[i]);
1102     }
1103   } else {
1104     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1105   }
1106 
1107   ierr  = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1108   /* compute lengths of each row  */
1109   for (i=0; i<a->mbs; i++) {
1110     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1111   }
1112   /* store the row lengths to the file */
1113   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1114   if (!rank) {
1115     MPI_Status status;
1116     ierr  = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1117     rlen  = (range[1] - range[0])/bs;
1118     for (i=0; i<rlen; i++) {
1119       for (j=0; j<bs; j++) {
1120         row_lens[i*bs+j] = bs*crow_lens[i];
1121       }
1122     }
1123     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1124     for (i=1; i<size; i++) {
1125       rlen = (range[i+1] - range[i])/bs;
1126       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1127       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1128       for (k=0; k<rlen; k++) {
1129 	for (j=0; j<bs; j++) {
1130 	  row_lens[k*bs+j] = bs*crow_lens[k];
1131 	}
1132       }
1133       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1134     }
1135     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1136     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1137   } else {
1138     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1139     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1140     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1141   }
1142   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1143 
1144   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1145      information needed to make it for each row from a block row. This does require more communication but still not more than
1146      the communication needed for the nonzero values  */
1147   nzmax = nz; /*  space a largest processor needs */
1148   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1149   ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1150   cnt  = 0;
1151   for (i=0; i<a->mbs; i++) {
1152     pcnt = cnt;
1153     for (j=B->i[i]; j<B->i[i+1]; j++) {
1154       if ( (col = garray[B->j[j]]) > cstart) break;
1155       for (l=0; l<bs; l++) {
1156 	column_indices[cnt++] = bs*col+l;
1157       }
1158     }
1159     for (k=A->i[i]; k<A->i[i+1]; k++) {
1160       for (l=0; l<bs; l++) {
1161         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1162       }
1163     }
1164     for (; j<B->i[i+1]; j++) {
1165       for (l=0; l<bs; l++) {
1166         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1167       }
1168     }
1169     len = cnt - pcnt;
1170     for (k=1; k<bs; k++) {
1171       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1172       cnt += len;
1173     }
1174   }
1175   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1176 
1177   /* store the columns to the file */
1178   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1179   if (!rank) {
1180     MPI_Status status;
1181     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1182     for (i=1; i<size; i++) {
1183       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1184       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1185       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1186       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1187     }
1188     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1189   } else {
1190     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1191     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1192     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1193     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1194   }
1195   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1196 
1197   /* load up the numerical values */
1198   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1199   cnt = 0;
1200   for (i=0; i<a->mbs; i++) {
1201     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1202     for (j=B->i[i]; j<B->i[i+1]; j++) {
1203       if ( garray[B->j[j]] > cstart) break;
1204       for (l=0; l<bs; l++) {
1205         for (ll=0; ll<bs; ll++) {
1206 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1207         }
1208       }
1209       cnt += bs;
1210     }
1211     for (k=A->i[i]; k<A->i[i+1]; k++) {
1212       for (l=0; l<bs; l++) {
1213         for (ll=0; ll<bs; ll++) {
1214           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1215         }
1216       }
1217       cnt += bs;
1218     }
1219     for (; j<B->i[i+1]; j++) {
1220       for (l=0; l<bs; l++) {
1221         for (ll=0; ll<bs; ll++) {
1222 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1223         }
1224       }
1225       cnt += bs;
1226     }
1227     cnt += (bs-1)*rlen;
1228   }
1229   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1230 
1231   /* store the column values to the file */
1232   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1233   if (!rank) {
1234     MPI_Status status;
1235     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1236     for (i=1; i<size; i++) {
1237       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1238       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1239       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1240       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1241     }
1242     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1243   } else {
1244     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1245     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1246     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1247     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1248   }
1249   ierr = PetscFree(column_values);CHKERRQ(ierr);
1250 
1251   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1252   if (file) {
1253     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatView_MPIBAIJ"
1260 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1261 {
1262   PetscErrorCode ierr;
1263   PetscBool      iascii,isdraw,issocket,isbinary;
1264 
1265   PetscFunctionBegin;
1266   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1267   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1268   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1269   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1270   if (iascii || isdraw || issocket) {
1271     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1272   } else if (isbinary) {
1273     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1274   } else {
1275     SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1276   }
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 #undef __FUNCT__
1281 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1282 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1283 {
1284   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1285   PetscErrorCode ierr;
1286 
1287   PetscFunctionBegin;
1288 #if defined(PETSC_USE_LOG)
1289   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1290 #endif
1291   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1292   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1293   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1294   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1295 #if defined (PETSC_USE_CTABLE)
1296   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1297 #else
1298   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1299 #endif
1300   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1301   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1302   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1303   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1304   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1305   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1306   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1307   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1308 
1309   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1310   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1311   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1312   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1313   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1314   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1315   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1316   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1317   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1318   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 #undef __FUNCT__
1323 #define __FUNCT__ "MatMult_MPIBAIJ"
1324 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1325 {
1326   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1327   PetscErrorCode ierr;
1328   PetscInt       nt;
1329 
1330   PetscFunctionBegin;
1331   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1332   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1333   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1334   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1335   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1336   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1337   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1338   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 #undef __FUNCT__
1343 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1344 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1345 {
1346   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1347   PetscErrorCode ierr;
1348 
1349   PetscFunctionBegin;
1350   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1351   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1352   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1353   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1354   PetscFunctionReturn(0);
1355 }
1356 
1357 #undef __FUNCT__
1358 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1359 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1360 {
1361   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1362   PetscErrorCode ierr;
1363   PetscBool      merged;
1364 
1365   PetscFunctionBegin;
1366   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1367   /* do nondiagonal part */
1368   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1369   if (!merged) {
1370     /* send it on its way */
1371     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1372     /* do local part */
1373     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1374     /* receive remote parts: note this assumes the values are not actually */
1375     /* inserted in yy until the next line */
1376     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1377   } else {
1378     /* do local part */
1379     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1380     /* send it on its way */
1381     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1382     /* values actually were received in the Begin() but we need to call this nop */
1383     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1384   }
1385   PetscFunctionReturn(0);
1386 }
1387 
1388 #undef __FUNCT__
1389 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1390 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1391 {
1392   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1393   PetscErrorCode ierr;
1394 
1395   PetscFunctionBegin;
1396   /* do nondiagonal part */
1397   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1398   /* send it on its way */
1399   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1400   /* do local part */
1401   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1402   /* receive remote parts: note this assumes the values are not actually */
1403   /* inserted in yy until the next line, which is true for my implementation*/
1404   /* but is not perhaps always true. */
1405   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 /*
1410   This only works correctly for square matrices where the subblock A->A is the
1411    diagonal block
1412 */
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1415 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1416 {
1417   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1418   PetscErrorCode ierr;
1419 
1420   PetscFunctionBegin;
1421   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1422   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 #undef __FUNCT__
1427 #define __FUNCT__ "MatScale_MPIBAIJ"
1428 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1429 {
1430   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1431   PetscErrorCode ierr;
1432 
1433   PetscFunctionBegin;
1434   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1435   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 #undef __FUNCT__
1440 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1441 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1442 {
1443   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1444   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1445   PetscErrorCode ierr;
1446   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1447   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1448   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1449 
1450   PetscFunctionBegin;
1451   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1452   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1453   mat->getrowactive = PETSC_TRUE;
1454 
1455   if (!mat->rowvalues && (idx || v)) {
1456     /*
1457         allocate enough space to hold information from the longest row.
1458     */
1459     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1460     PetscInt     max = 1,mbs = mat->mbs,tmp;
1461     for (i=0; i<mbs; i++) {
1462       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1463       if (max < tmp) { max = tmp; }
1464     }
1465     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1466   }
1467   lrow = row - brstart;
1468 
1469   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1470   if (!v)   {pvA = 0; pvB = 0;}
1471   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1472   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1473   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1474   nztot = nzA + nzB;
1475 
1476   cmap  = mat->garray;
1477   if (v  || idx) {
1478     if (nztot) {
1479       /* Sort by increasing column numbers, assuming A and B already sorted */
1480       PetscInt imark = -1;
1481       if (v) {
1482         *v = v_p = mat->rowvalues;
1483         for (i=0; i<nzB; i++) {
1484           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1485           else break;
1486         }
1487         imark = i;
1488         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1489         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1490       }
1491       if (idx) {
1492         *idx = idx_p = mat->rowindices;
1493         if (imark > -1) {
1494           for (i=0; i<imark; i++) {
1495             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1496           }
1497         } else {
1498           for (i=0; i<nzB; i++) {
1499             if (cmap[cworkB[i]/bs] < cstart)
1500               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1501             else break;
1502           }
1503           imark = i;
1504         }
1505         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1506         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1507       }
1508     } else {
1509       if (idx) *idx = 0;
1510       if (v)   *v   = 0;
1511     }
1512   }
1513   *nz = nztot;
1514   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1515   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1516   PetscFunctionReturn(0);
1517 }
1518 
1519 #undef __FUNCT__
1520 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1521 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1522 {
1523   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1524 
1525   PetscFunctionBegin;
1526   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1527   baij->getrowactive = PETSC_FALSE;
1528   PetscFunctionReturn(0);
1529 }
1530 
1531 #undef __FUNCT__
1532 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1533 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1534 {
1535   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1536   PetscErrorCode ierr;
1537 
1538   PetscFunctionBegin;
1539   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1540   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 #undef __FUNCT__
1545 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1546 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1547 {
1548   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1549   Mat            A = a->A,B = a->B;
1550   PetscErrorCode ierr;
1551   PetscReal      isend[5],irecv[5];
1552 
1553   PetscFunctionBegin;
1554   info->block_size     = (PetscReal)matin->rmap->bs;
1555   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1556   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1557   isend[3] = info->memory;  isend[4] = info->mallocs;
1558   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1559   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1560   isend[3] += info->memory;  isend[4] += info->mallocs;
1561   if (flag == MAT_LOCAL) {
1562     info->nz_used      = isend[0];
1563     info->nz_allocated = isend[1];
1564     info->nz_unneeded  = isend[2];
1565     info->memory       = isend[3];
1566     info->mallocs      = isend[4];
1567   } else if (flag == MAT_GLOBAL_MAX) {
1568     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1569     info->nz_used      = irecv[0];
1570     info->nz_allocated = irecv[1];
1571     info->nz_unneeded  = irecv[2];
1572     info->memory       = irecv[3];
1573     info->mallocs      = irecv[4];
1574   } else if (flag == MAT_GLOBAL_SUM) {
1575     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1576     info->nz_used      = irecv[0];
1577     info->nz_allocated = irecv[1];
1578     info->nz_unneeded  = irecv[2];
1579     info->memory       = irecv[3];
1580     info->mallocs      = irecv[4];
1581   } else {
1582     SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1583   }
1584   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1585   info->fill_ratio_needed = 0;
1586   info->factor_mallocs    = 0;
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 #undef __FUNCT__
1591 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1592 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool  flg)
1593 {
1594   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1595   PetscErrorCode ierr;
1596 
1597   PetscFunctionBegin;
1598   switch (op) {
1599   case MAT_NEW_NONZERO_LOCATIONS:
1600   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1601   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1602   case MAT_KEEP_NONZERO_PATTERN:
1603   case MAT_NEW_NONZERO_LOCATION_ERR:
1604     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1605     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1606     break;
1607   case MAT_ROW_ORIENTED:
1608     a->roworiented = flg;
1609     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1610     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1611     break;
1612   case MAT_NEW_DIAGONALS:
1613     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1614     break;
1615   case MAT_IGNORE_OFF_PROC_ENTRIES:
1616     a->donotstash = flg;
1617     break;
1618   case MAT_USE_HASH_TABLE:
1619     a->ht_flag = flg;
1620     break;
1621   case MAT_SYMMETRIC:
1622   case MAT_STRUCTURALLY_SYMMETRIC:
1623   case MAT_HERMITIAN:
1624   case MAT_SYMMETRY_ETERNAL:
1625     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1626     break;
1627   default:
1628     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1629   }
1630   PetscFunctionReturn(0);
1631 }
1632 
1633 #undef __FUNCT__
1634 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1635 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1636 {
1637   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1638   Mat_SeqBAIJ    *Aloc;
1639   Mat            B;
1640   PetscErrorCode ierr;
1641   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1642   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1643   MatScalar      *a;
1644 
1645   PetscFunctionBegin;
1646   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1647   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1648     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1649     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1650     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1651     /* Do not know preallocation information, but must set block size */
1652     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,PETSC_NULL,PETSC_DECIDE,PETSC_NULL);CHKERRQ(ierr);
1653   } else {
1654     B = *matout;
1655   }
1656 
1657   /* copy over the A part */
1658   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1659   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1660   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1661 
1662   for (i=0; i<mbs; i++) {
1663     rvals[0] = bs*(baij->rstartbs + i);
1664     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1665     for (j=ai[i]; j<ai[i+1]; j++) {
1666       col = (baij->cstartbs+aj[j])*bs;
1667       for (k=0; k<bs; k++) {
1668         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1669         col++; a += bs;
1670       }
1671     }
1672   }
1673   /* copy over the B part */
1674   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1675   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1676   for (i=0; i<mbs; i++) {
1677     rvals[0] = bs*(baij->rstartbs + i);
1678     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1679     for (j=ai[i]; j<ai[i+1]; j++) {
1680       col = baij->garray[aj[j]]*bs;
1681       for (k=0; k<bs; k++) {
1682         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1683         col++; a += bs;
1684       }
1685     }
1686   }
1687   ierr = PetscFree(rvals);CHKERRQ(ierr);
1688   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1689   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1690 
1691   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1692     *matout = B;
1693   } else {
1694     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1695   }
1696   PetscFunctionReturn(0);
1697 }
1698 
1699 #undef __FUNCT__
1700 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1701 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1702 {
1703   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1704   Mat            a = baij->A,b = baij->B;
1705   PetscErrorCode ierr;
1706   PetscInt       s1,s2,s3;
1707 
1708   PetscFunctionBegin;
1709   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1710   if (rr) {
1711     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1712     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1713     /* Overlap communication with computation. */
1714     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1715   }
1716   if (ll) {
1717     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1718     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1719     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1720   }
1721   /* scale  the diagonal block */
1722   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1723 
1724   if (rr) {
1725     /* Do a scatter end and then right scale the off-diagonal block */
1726     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1727     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1728   }
1729 
1730   PetscFunctionReturn(0);
1731 }
1732 
1733 #undef __FUNCT__
1734 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1735 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1736 {
1737   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1738   PetscErrorCode    ierr;
1739   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1740   PetscInt          i,*owners = A->rmap->range;
1741   PetscInt          *nprocs,j,idx,nsends,row;
1742   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1743   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1744   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1745   MPI_Comm          comm = ((PetscObject)A)->comm;
1746   MPI_Request       *send_waits,*recv_waits;
1747   MPI_Status        recv_status,*send_status;
1748   const PetscScalar *xx;
1749   PetscScalar       *bb;
1750 #if defined(PETSC_DEBUG)
1751   PetscBool         found = PETSC_FALSE;
1752 #endif
1753 
1754   PetscFunctionBegin;
1755   /*  first count number of contributors to each processor */
1756   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1757   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1758   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1759   j = 0;
1760   for (i=0; i<N; i++) {
1761     if (lastidx > (idx = rows[i])) j = 0;
1762     lastidx = idx;
1763     for (; j<size; j++) {
1764       if (idx >= owners[j] && idx < owners[j+1]) {
1765         nprocs[2*j]++;
1766         nprocs[2*j+1] = 1;
1767         owner[i] = j;
1768 #if defined(PETSC_DEBUG)
1769         found = PETSC_TRUE;
1770 #endif
1771         break;
1772       }
1773     }
1774 #if defined(PETSC_DEBUG)
1775     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1776     found = PETSC_FALSE;
1777 #endif
1778   }
1779   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1780 
1781   if (A->nooffproczerorows) {
1782     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1783     nrecvs = nsends;
1784     nmax   = N;
1785   } else {
1786     /* inform other processors of number of messages and max length*/
1787     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1788   }
1789 
1790   /* post receives:   */
1791   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1792   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1793   for (i=0; i<nrecvs; i++) {
1794     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1795   }
1796 
1797   /* do sends:
1798      1) starts[i] gives the starting index in svalues for stuff going to
1799      the ith processor
1800   */
1801   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1802   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1803   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1804   starts[0]  = 0;
1805   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1806   for (i=0; i<N; i++) {
1807     svalues[starts[owner[i]]++] = rows[i];
1808   }
1809 
1810   starts[0] = 0;
1811   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1812   count = 0;
1813   for (i=0; i<size; i++) {
1814     if (nprocs[2*i+1]) {
1815       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1816     }
1817   }
1818   ierr = PetscFree(starts);CHKERRQ(ierr);
1819 
1820   base = owners[rank];
1821 
1822   /*  wait on receives */
1823   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1824   count  = nrecvs;
1825   slen = 0;
1826   while (count) {
1827     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1828     /* unpack receives into our local space */
1829     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1830     source[imdex]  = recv_status.MPI_SOURCE;
1831     lens[imdex]    = n;
1832     slen          += n;
1833     count--;
1834   }
1835   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1836 
1837   /* move the data into the send scatter */
1838   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1839   count = 0;
1840   for (i=0; i<nrecvs; i++) {
1841     values = rvalues + i*nmax;
1842     for (j=0; j<lens[i]; j++) {
1843       lrows[count++] = values[j] - base;
1844     }
1845   }
1846   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1847   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1848   ierr = PetscFree(owner);CHKERRQ(ierr);
1849   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1850 
1851   /* fix right hand side if needed */
1852   if (x && b) {
1853     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1854     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1855     for (i=0; i<slen; i++) {
1856       bb[lrows[i]] = diag*xx[lrows[i]];
1857     }
1858     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1859     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1860   }
1861 
1862   /* actually zap the local rows */
1863   /*
1864         Zero the required rows. If the "diagonal block" of the matrix
1865      is square and the user wishes to set the diagonal we use separate
1866      code so that MatSetValues() is not called for each diagonal allocating
1867      new memory, thus calling lots of mallocs and slowing things down.
1868 
1869   */
1870   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1871   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1872   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1873     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1874   } else if (diag != 0.0) {
1875     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1876     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1877        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1878     for (i=0; i<slen; i++) {
1879       row  = lrows[i] + rstart_bs;
1880       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1881     }
1882     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1883     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1884   } else {
1885     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1886   }
1887 
1888   ierr = PetscFree(lrows);CHKERRQ(ierr);
1889 
1890   /* wait on sends */
1891   if (nsends) {
1892     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1893     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1894     ierr = PetscFree(send_status);CHKERRQ(ierr);
1895   }
1896   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1897   ierr = PetscFree(svalues);CHKERRQ(ierr);
1898 
1899   PetscFunctionReturn(0);
1900 }
1901 
1902 #undef __FUNCT__
1903 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1904 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1905 {
1906   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1907   PetscErrorCode ierr;
1908 
1909   PetscFunctionBegin;
1910   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1911   PetscFunctionReturn(0);
1912 }
1913 
1914 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1915 
1916 #undef __FUNCT__
1917 #define __FUNCT__ "MatEqual_MPIBAIJ"
1918 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1919 {
1920   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1921   Mat            a,b,c,d;
1922   PetscBool      flg;
1923   PetscErrorCode ierr;
1924 
1925   PetscFunctionBegin;
1926   a = matA->A; b = matA->B;
1927   c = matB->A; d = matB->B;
1928 
1929   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1930   if (flg) {
1931     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1932   }
1933   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1934   PetscFunctionReturn(0);
1935 }
1936 
1937 #undef __FUNCT__
1938 #define __FUNCT__ "MatCopy_MPIBAIJ"
1939 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1940 {
1941   PetscErrorCode ierr;
1942   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1943   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1944 
1945   PetscFunctionBegin;
1946   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1947   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1948     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1949   } else {
1950     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1951     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1952   }
1953   PetscFunctionReturn(0);
1954 }
1955 
1956 #undef __FUNCT__
1957 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1958 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1959 {
1960   PetscErrorCode ierr;
1961 
1962   PetscFunctionBegin;
1963   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1964   PetscFunctionReturn(0);
1965 }
1966 
1967 #undef __FUNCT__
1968 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1969 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1970 {
1971   PetscErrorCode ierr;
1972   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1973   PetscBLASInt   bnz,one=1;
1974   Mat_SeqBAIJ    *x,*y;
1975 
1976   PetscFunctionBegin;
1977   if (str == SAME_NONZERO_PATTERN) {
1978     PetscScalar alpha = a;
1979     x = (Mat_SeqBAIJ *)xx->A->data;
1980     y = (Mat_SeqBAIJ *)yy->A->data;
1981     bnz = PetscBLASIntCast(x->nz);
1982     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1983     x = (Mat_SeqBAIJ *)xx->B->data;
1984     y = (Mat_SeqBAIJ *)yy->B->data;
1985     bnz = PetscBLASIntCast(x->nz);
1986     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1987   } else {
1988     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1989   }
1990   PetscFunctionReturn(0);
1991 }
1992 
1993 #undef __FUNCT__
1994 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ"
1995 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1996 {
1997   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1998   PetscInt rbs,cbs;
1999   PetscErrorCode ierr;
2000 
2001   PetscFunctionBegin;
2002   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2003   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2004   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2005   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2006   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2007   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2008   PetscFunctionReturn(0);
2009 }
2010 
2011 #undef __FUNCT__
2012 #define __FUNCT__ "MatRealPart_MPIBAIJ"
2013 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2014 {
2015   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2016   PetscErrorCode ierr;
2017 
2018   PetscFunctionBegin;
2019   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2020   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2021   PetscFunctionReturn(0);
2022 }
2023 
2024 #undef __FUNCT__
2025 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2026 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2027 {
2028   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2029   PetscErrorCode ierr;
2030 
2031   PetscFunctionBegin;
2032   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2033   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 #undef __FUNCT__
2038 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2039 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2040 {
2041   PetscErrorCode ierr;
2042   IS             iscol_local;
2043   PetscInt       csize;
2044 
2045   PetscFunctionBegin;
2046   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2047   if (call == MAT_REUSE_MATRIX) {
2048     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2049     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2050   } else {
2051     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2052   }
2053   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2054   if (call == MAT_INITIAL_MATRIX) {
2055     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2056     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2057   }
2058   PetscFunctionReturn(0);
2059 }
2060 
2061 #undef __FUNCT__
2062 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2063 /*
2064     Not great since it makes two copies of the submatrix, first an SeqBAIJ
2065   in local and then by concatenating the local matrices the end result.
2066   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2067 */
2068 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2069 {
2070   PetscErrorCode ierr;
2071   PetscMPIInt    rank,size;
2072   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2073   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2074   Mat            *local,M,Mreuse;
2075   MatScalar      *vwork,*aa;
2076   MPI_Comm       comm = ((PetscObject)mat)->comm;
2077   Mat_SeqBAIJ    *aij;
2078 
2079 
2080   PetscFunctionBegin;
2081   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2082   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2083 
2084   if (call ==  MAT_REUSE_MATRIX) {
2085     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2086     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2087     local = &Mreuse;
2088     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2089   } else {
2090     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2091     Mreuse = *local;
2092     ierr   = PetscFree(local);CHKERRQ(ierr);
2093   }
2094 
2095   /*
2096       m - number of local rows
2097       n - number of columns (same on all processors)
2098       rstart - first row in new global matrix generated
2099   */
2100   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2101   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2102   m    = m/bs;
2103   n    = n/bs;
2104 
2105   if (call == MAT_INITIAL_MATRIX) {
2106     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2107     ii  = aij->i;
2108     jj  = aij->j;
2109 
2110     /*
2111         Determine the number of non-zeros in the diagonal and off-diagonal
2112         portions of the matrix in order to do correct preallocation
2113     */
2114 
2115     /* first get start and end of "diagonal" columns */
2116     if (csize == PETSC_DECIDE) {
2117       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2118       if (mglobal == n*bs) { /* square matrix */
2119 	nlocal = m;
2120       } else {
2121         nlocal = n/size + ((n % size) > rank);
2122       }
2123     } else {
2124       nlocal = csize/bs;
2125     }
2126     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2127     rstart = rend - nlocal;
2128     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2129 
2130     /* next, compute all the lengths */
2131     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2132     olens = dlens + m;
2133     for (i=0; i<m; i++) {
2134       jend = ii[i+1] - ii[i];
2135       olen = 0;
2136       dlen = 0;
2137       for (j=0; j<jend; j++) {
2138         if (*jj < rstart || *jj >= rend) olen++;
2139         else dlen++;
2140         jj++;
2141       }
2142       olens[i] = olen;
2143       dlens[i] = dlen;
2144     }
2145     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2146     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2147     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2148     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2149     ierr = PetscFree(dlens);CHKERRQ(ierr);
2150   } else {
2151     PetscInt ml,nl;
2152 
2153     M = *newmat;
2154     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2155     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2156     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2157     /*
2158          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2159        rather than the slower MatSetValues().
2160     */
2161     M->was_assembled = PETSC_TRUE;
2162     M->assembled     = PETSC_FALSE;
2163   }
2164   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2165   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2166   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2167   ii  = aij->i;
2168   jj  = aij->j;
2169   aa  = aij->a;
2170   for (i=0; i<m; i++) {
2171     row   = rstart/bs + i;
2172     nz    = ii[i+1] - ii[i];
2173     cwork = jj;     jj += nz;
2174     vwork = aa;     aa += nz;
2175     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2176   }
2177 
2178   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2179   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2180   *newmat = M;
2181 
2182   /* save submatrix used in processor for next request */
2183   if (call ==  MAT_INITIAL_MATRIX) {
2184     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2185     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2186   }
2187 
2188   PetscFunctionReturn(0);
2189 }
2190 
2191 #undef __FUNCT__
2192 #define __FUNCT__ "MatPermute_MPIBAIJ"
2193 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2194 {
2195   MPI_Comm       comm,pcomm;
2196   PetscInt       first,local_size,nrows;
2197   const PetscInt *rows;
2198   PetscMPIInt    size;
2199   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
2200   PetscErrorCode ierr;
2201 
2202   PetscFunctionBegin;
2203   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2204   /* make a collective version of 'rowp' */
2205   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2206   if (pcomm==comm) {
2207     crowp = rowp;
2208   } else {
2209     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2210     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2211     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2212     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2213   }
2214   /* collect the global row permutation and invert it */
2215   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2216   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2217   if (pcomm!=comm) {
2218     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2219   }
2220   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2221   /* get the local target indices */
2222   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2223   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2224   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2225   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2226   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2227   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
2228   /* the column permutation is so much easier;
2229      make a local version of 'colp' and invert it */
2230   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2231   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2232   if (size==1) {
2233     lcolp = colp;
2234   } else {
2235     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2236     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2237     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
2238   }
2239   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2240   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2241   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2242   if (size>1) {
2243     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2244     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2245   }
2246   /* now we just get the submatrix */
2247   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2248   /* clean up */
2249   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
2250   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
2251   PetscFunctionReturn(0);
2252 }
2253 
2254 #undef __FUNCT__
2255 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2256 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2257 {
2258   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2259   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2260 
2261   PetscFunctionBegin;
2262   if (nghosts) { *nghosts = B->nbs;}
2263   if (ghosts) {*ghosts = baij->garray;}
2264   PetscFunctionReturn(0);
2265 }
2266 
2267 extern PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2268 
2269 #undef __FUNCT__
2270 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2271 /*
2272     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2273 */
2274 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2275 {
2276   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2277   PetscErrorCode        ierr;
2278   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2279   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2280   const PetscInt        *is;
2281   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2282   PetscInt              *rowhit,M,cstart,cend,colb;
2283   PetscInt              *columnsforrow,l;
2284   IS                    *isa;
2285   PetscBool              done,flg;
2286   ISLocalToGlobalMapping map = mat->cmap->bmapping;
2287   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2288 
2289   PetscFunctionBegin;
2290   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2291   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2292 
2293   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2294   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2295   M                = mat->rmap->n/bs;
2296   cstart           = mat->cmap->rstart/bs;
2297   cend             = mat->cmap->rend/bs;
2298   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2299   c->N             = mat->cmap->N/bs;
2300   c->m             = mat->rmap->n/bs;
2301   c->rstart        = mat->rmap->rstart/bs;
2302 
2303   c->ncolors       = nis;
2304   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2305   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2306   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2307   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2308   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2309   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2310 
2311   /* Allow access to data structures of local part of matrix */
2312   if (!baij->colmap) {
2313     ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2314   }
2315   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2316   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2317 
2318   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2319   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2320 
2321   for (i=0; i<nis; i++) {
2322     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2323     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2324     c->ncolumns[i] = n;
2325     if (n) {
2326       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2327       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2328       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2329     } else {
2330       c->columns[i]  = 0;
2331     }
2332 
2333     if (ctype == IS_COLORING_GLOBAL){
2334       /* Determine the total (parallel) number of columns of this color */
2335       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2336       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2337 
2338       nn   = PetscMPIIntCast(n);
2339       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2340       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2341       if (!nctot) {
2342         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2343       }
2344 
2345       disp[0] = 0;
2346       for (j=1; j<size; j++) {
2347         disp[j] = disp[j-1] + ncolsonproc[j-1];
2348       }
2349 
2350       /* Get complete list of columns for color on each processor */
2351       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2352       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2353       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2354     } else if (ctype == IS_COLORING_GHOSTED){
2355       /* Determine local number of columns of this color on this process, including ghost points */
2356       nctot = n;
2357       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2358       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2359     } else {
2360       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2361     }
2362 
2363     /*
2364        Mark all rows affect by these columns
2365     */
2366     /* Temporary option to allow for debugging/testing */
2367     flg  = PETSC_FALSE;
2368     ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2369     if (!flg) {/*-----------------------------------------------------------------------------*/
2370       /* crude, fast version */
2371       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2372       /* loop over columns*/
2373       for (j=0; j<nctot; j++) {
2374         if (ctype == IS_COLORING_GHOSTED) {
2375           col = ltog[cols[j]];
2376         } else {
2377           col  = cols[j];
2378         }
2379         if (col >= cstart && col < cend) {
2380           /* column is in diagonal block of matrix */
2381           rows = A_cj + A_ci[col-cstart];
2382           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2383         } else {
2384 #if defined (PETSC_USE_CTABLE)
2385           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2386 	  colb --;
2387 #else
2388           colb = baij->colmap[col] - 1;
2389 #endif
2390           if (colb == -1) {
2391             m = 0;
2392           } else {
2393             colb = colb/bs;
2394             rows = B_cj + B_ci[colb];
2395             m    = B_ci[colb+1] - B_ci[colb];
2396           }
2397         }
2398         /* loop over columns marking them in rowhit */
2399         for (k=0; k<m; k++) {
2400           rowhit[*rows++] = col + 1;
2401         }
2402       }
2403 
2404       /* count the number of hits */
2405       nrows = 0;
2406       for (j=0; j<M; j++) {
2407         if (rowhit[j]) nrows++;
2408       }
2409       c->nrows[i]         = nrows;
2410       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2411       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2412       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2413       nrows = 0;
2414       for (j=0; j<M; j++) {
2415         if (rowhit[j]) {
2416           c->rows[i][nrows]           = j;
2417           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2418           nrows++;
2419         }
2420       }
2421     } else {/*-------------------------------------------------------------------------------*/
2422       /* slow version, using rowhit as a linked list */
2423       PetscInt currentcol,fm,mfm;
2424       rowhit[M] = M;
2425       nrows     = 0;
2426       /* loop over columns*/
2427       for (j=0; j<nctot; j++) {
2428         if (ctype == IS_COLORING_GHOSTED) {
2429           col = ltog[cols[j]];
2430         } else {
2431           col  = cols[j];
2432         }
2433         if (col >= cstart && col < cend) {
2434           /* column is in diagonal block of matrix */
2435           rows = A_cj + A_ci[col-cstart];
2436           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2437         } else {
2438 #if defined (PETSC_USE_CTABLE)
2439           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2440           colb --;
2441 #else
2442           colb = baij->colmap[col] - 1;
2443 #endif
2444           if (colb == -1) {
2445             m = 0;
2446           } else {
2447             colb = colb/bs;
2448             rows = B_cj + B_ci[colb];
2449             m    = B_ci[colb+1] - B_ci[colb];
2450           }
2451         }
2452 
2453         /* loop over columns marking them in rowhit */
2454         fm    = M; /* fm points to first entry in linked list */
2455         for (k=0; k<m; k++) {
2456           currentcol = *rows++;
2457 	  /* is it already in the list? */
2458           do {
2459             mfm  = fm;
2460             fm   = rowhit[fm];
2461           } while (fm < currentcol);
2462           /* not in list so add it */
2463           if (fm != currentcol) {
2464             nrows++;
2465             columnsforrow[currentcol] = col;
2466             /* next three lines insert new entry into linked list */
2467             rowhit[mfm]               = currentcol;
2468             rowhit[currentcol]        = fm;
2469             fm                        = currentcol;
2470             /* fm points to present position in list since we know the columns are sorted */
2471           } else {
2472             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2473           }
2474         }
2475       }
2476       c->nrows[i]         = nrows;
2477       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2478       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2479       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2480       /* now store the linked list of rows into c->rows[i] */
2481       nrows = 0;
2482       fm    = rowhit[M];
2483       do {
2484         c->rows[i][nrows]            = fm;
2485         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2486         fm                           = rowhit[fm];
2487       } while (fm < M);
2488     } /* ---------------------------------------------------------------------------------------*/
2489     ierr = PetscFree(cols);CHKERRQ(ierr);
2490   }
2491 
2492   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2493   /*
2494        vscale will contain the "diagonal" on processor scalings followed by the off processor
2495   */
2496   if (ctype == IS_COLORING_GLOBAL) {
2497     PetscInt *garray;
2498     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2499     for (i=0; i<baij->B->cmap->n/bs; i++) {
2500       for (j=0; j<bs; j++) {
2501         garray[i*bs+j] = bs*baij->garray[i]+j;
2502       }
2503     }
2504     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2505     ierr = PetscFree(garray);CHKERRQ(ierr);
2506     CHKMEMQ;
2507     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2508     for (k=0; k<c->ncolors; k++) {
2509       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2510       for (l=0; l<c->nrows[k]; l++) {
2511         col = c->columnsforrow[k][l];
2512         if (col >= cstart && col < cend) {
2513           /* column is in diagonal block of matrix */
2514           colb = col - cstart;
2515         } else {
2516           /* column  is in "off-processor" part */
2517 #if defined (PETSC_USE_CTABLE)
2518           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2519           colb --;
2520 #else
2521           colb = baij->colmap[col] - 1;
2522 #endif
2523           colb = colb/bs;
2524           colb += cend - cstart;
2525         }
2526         c->vscaleforrow[k][l] = colb;
2527       }
2528     }
2529   } else if (ctype == IS_COLORING_GHOSTED) {
2530     /* Get gtol mapping */
2531     PetscInt N = mat->cmap->N, *gtol;
2532     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2533     for (i=0; i<N; i++) gtol[i] = -1;
2534     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2535 
2536     c->vscale = 0; /* will be created in MatFDColoringApply() */
2537     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2538     for (k=0; k<c->ncolors; k++) {
2539       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2540       for (l=0; l<c->nrows[k]; l++) {
2541         col = c->columnsforrow[k][l];      /* global column index */
2542         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2543       }
2544     }
2545     ierr = PetscFree(gtol);CHKERRQ(ierr);
2546   }
2547   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2548 
2549   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2550   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2551   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2552   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2553     CHKMEMQ;
2554   PetscFunctionReturn(0);
2555 }
2556 
2557 #undef __FUNCT__
2558 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2559 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2560 {
2561   Mat            B;
2562   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2563   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2564   Mat_SeqAIJ     *b;
2565   PetscErrorCode ierr;
2566   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2567   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2568   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2569 
2570   PetscFunctionBegin;
2571   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2572   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2573 
2574   /* ----------------------------------------------------------------
2575      Tell every processor the number of nonzeros per row
2576   */
2577   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2578   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2579     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2580   }
2581   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2582   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2583   displs     = recvcounts + size;
2584   for (i=0; i<size; i++) {
2585     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2586     displs[i]     = A->rmap->range[i]/bs;
2587   }
2588 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2589   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2590 #else
2591   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2592 #endif
2593   /* ---------------------------------------------------------------
2594      Create the sequential matrix of the same type as the local block diagonal
2595   */
2596   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2597   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2598   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2599   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2600   b = (Mat_SeqAIJ *)B->data;
2601 
2602   /*--------------------------------------------------------------------
2603     Copy my part of matrix column indices over
2604   */
2605   sendcount  = ad->nz + bd->nz;
2606   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2607   a_jsendbuf = ad->j;
2608   b_jsendbuf = bd->j;
2609   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2610   cnt        = 0;
2611   for (i=0; i<n; i++) {
2612 
2613     /* put in lower diagonal portion */
2614     m = bd->i[i+1] - bd->i[i];
2615     while (m > 0) {
2616       /* is it above diagonal (in bd (compressed) numbering) */
2617       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2618       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2619       m--;
2620     }
2621 
2622     /* put in diagonal portion */
2623     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2624       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2625     }
2626 
2627     /* put in upper diagonal portion */
2628     while (m-- > 0) {
2629       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2630     }
2631   }
2632   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2633 
2634   /*--------------------------------------------------------------------
2635     Gather all column indices to all processors
2636   */
2637   for (i=0; i<size; i++) {
2638     recvcounts[i] = 0;
2639     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2640       recvcounts[i] += lens[j];
2641     }
2642   }
2643   displs[0]  = 0;
2644   for (i=1; i<size; i++) {
2645     displs[i] = displs[i-1] + recvcounts[i-1];
2646   }
2647 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2648   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2649 #else
2650   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2651 #endif
2652   /*--------------------------------------------------------------------
2653     Assemble the matrix into useable form (note numerical values not yet set)
2654   */
2655   /* set the b->ilen (length of each row) values */
2656   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2657   /* set the b->i indices */
2658   b->i[0] = 0;
2659   for (i=1; i<=A->rmap->N/bs; i++) {
2660     b->i[i] = b->i[i-1] + lens[i-1];
2661   }
2662   ierr = PetscFree(lens);CHKERRQ(ierr);
2663   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2664   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2665   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2666 
2667   if (A->symmetric){
2668     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2669   } else if (A->hermitian) {
2670     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2671   } else if (A->structurally_symmetric) {
2672     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2673   }
2674   *newmat = B;
2675   PetscFunctionReturn(0);
2676 }
2677 
2678 #undef __FUNCT__
2679 #define __FUNCT__ "MatSOR_MPIBAIJ"
2680 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2681 {
2682   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2683   PetscErrorCode ierr;
2684   Vec            bb1 = 0;
2685 
2686   PetscFunctionBegin;
2687   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2688     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2689   }
2690 
2691   if (flag == SOR_APPLY_UPPER) {
2692     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2693     PetscFunctionReturn(0);
2694   }
2695 
2696   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2697     if (flag & SOR_ZERO_INITIAL_GUESS) {
2698       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2699       its--;
2700     }
2701 
2702     while (its--) {
2703       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2704       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2705 
2706       /* update rhs: bb1 = bb - B*x */
2707       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2708       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2709 
2710       /* local sweep */
2711       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2712     }
2713   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2714     if (flag & SOR_ZERO_INITIAL_GUESS) {
2715       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2716       its--;
2717     }
2718     while (its--) {
2719       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2720       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2721 
2722       /* update rhs: bb1 = bb - B*x */
2723       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2724       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2725 
2726       /* local sweep */
2727       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2728     }
2729   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2730     if (flag & SOR_ZERO_INITIAL_GUESS) {
2731       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2732       its--;
2733     }
2734     while (its--) {
2735       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2736       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2737 
2738       /* update rhs: bb1 = bb - B*x */
2739       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2740       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2741 
2742       /* local sweep */
2743       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2744     }
2745   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2746 
2747   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2748   PetscFunctionReturn(0);
2749 }
2750 
2751 extern PetscErrorCode  MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2752 
2753 #undef __FUNCT__
2754 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2755 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,PetscScalar **values)
2756 {
2757   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2758   PetscErrorCode ierr;
2759 
2760   PetscFunctionBegin;
2761   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2762   PetscFunctionReturn(0);
2763 }
2764 
2765 
2766 /* -------------------------------------------------------------------*/
2767 static struct _MatOps MatOps_Values = {
2768        MatSetValues_MPIBAIJ,
2769        MatGetRow_MPIBAIJ,
2770        MatRestoreRow_MPIBAIJ,
2771        MatMult_MPIBAIJ,
2772 /* 4*/ MatMultAdd_MPIBAIJ,
2773        MatMultTranspose_MPIBAIJ,
2774        MatMultTransposeAdd_MPIBAIJ,
2775        0,
2776        0,
2777        0,
2778 /*10*/ 0,
2779        0,
2780        0,
2781        MatSOR_MPIBAIJ,
2782        MatTranspose_MPIBAIJ,
2783 /*15*/ MatGetInfo_MPIBAIJ,
2784        MatEqual_MPIBAIJ,
2785        MatGetDiagonal_MPIBAIJ,
2786        MatDiagonalScale_MPIBAIJ,
2787        MatNorm_MPIBAIJ,
2788 /*20*/ MatAssemblyBegin_MPIBAIJ,
2789        MatAssemblyEnd_MPIBAIJ,
2790        MatSetOption_MPIBAIJ,
2791        MatZeroEntries_MPIBAIJ,
2792 /*24*/ MatZeroRows_MPIBAIJ,
2793        0,
2794        0,
2795        0,
2796        0,
2797 /*29*/ MatSetUpPreallocation_MPIBAIJ,
2798        0,
2799        0,
2800        0,
2801        0,
2802 /*34*/ MatDuplicate_MPIBAIJ,
2803        0,
2804        0,
2805        0,
2806        0,
2807 /*39*/ MatAXPY_MPIBAIJ,
2808        MatGetSubMatrices_MPIBAIJ,
2809        MatIncreaseOverlap_MPIBAIJ,
2810        MatGetValues_MPIBAIJ,
2811        MatCopy_MPIBAIJ,
2812 /*44*/ 0,
2813        MatScale_MPIBAIJ,
2814        0,
2815        0,
2816        0,
2817 /*49*/ MatSetBlockSize_MPIBAIJ,
2818        0,
2819        0,
2820        0,
2821        0,
2822 /*54*/ MatFDColoringCreate_MPIBAIJ,
2823        0,
2824        MatSetUnfactored_MPIBAIJ,
2825        MatPermute_MPIBAIJ,
2826        MatSetValuesBlocked_MPIBAIJ,
2827 /*59*/ MatGetSubMatrix_MPIBAIJ,
2828        MatDestroy_MPIBAIJ,
2829        MatView_MPIBAIJ,
2830        0,
2831        0,
2832 /*64*/ 0,
2833        0,
2834        0,
2835        0,
2836        0,
2837 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2838        0,
2839        0,
2840        0,
2841        0,
2842 /*74*/ 0,
2843        MatFDColoringApply_BAIJ,
2844        0,
2845        0,
2846        0,
2847 /*79*/ 0,
2848        0,
2849        0,
2850        0,
2851        MatLoad_MPIBAIJ,
2852 /*84*/ 0,
2853        0,
2854        0,
2855        0,
2856        0,
2857 /*89*/ 0,
2858        0,
2859        0,
2860        0,
2861        0,
2862 /*94*/ 0,
2863        0,
2864        0,
2865        0,
2866        0,
2867 /*99*/ 0,
2868        0,
2869        0,
2870        0,
2871        0,
2872 /*104*/0,
2873        MatRealPart_MPIBAIJ,
2874        MatImaginaryPart_MPIBAIJ,
2875        0,
2876        0,
2877 /*109*/0,
2878        0,
2879        0,
2880        0,
2881        0,
2882 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2883        0,
2884        MatGetGhosts_MPIBAIJ,
2885        0,
2886        0,
2887 /*119*/0,
2888        0,
2889        0,
2890        0,
2891        0,
2892 /*124*/0,
2893        0,
2894        MatInvertBlockDiagonal_MPIBAIJ
2895 };
2896 
2897 EXTERN_C_BEGIN
2898 #undef __FUNCT__
2899 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2900 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2901 {
2902   PetscFunctionBegin;
2903   *a = ((Mat_MPIBAIJ *)A->data)->A;
2904   PetscFunctionReturn(0);
2905 }
2906 EXTERN_C_END
2907 
2908 EXTERN_C_BEGIN
2909 extern PetscErrorCode  MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2910 EXTERN_C_END
2911 
2912 EXTERN_C_BEGIN
2913 #undef __FUNCT__
2914 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2915 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2916 {
2917   PetscInt       m,rstart,cstart,cend;
2918   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2919   const PetscInt *JJ=0;
2920   PetscScalar    *values=0;
2921   PetscErrorCode ierr;
2922 
2923   PetscFunctionBegin;
2924 
2925   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2926   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2927   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2928   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2929   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2930   m      = B->rmap->n/bs;
2931   rstart = B->rmap->rstart/bs;
2932   cstart = B->cmap->rstart/bs;
2933   cend   = B->cmap->rend/bs;
2934 
2935   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2936   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2937   for (i=0; i<m; i++) {
2938     nz = ii[i+1] - ii[i];
2939     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2940     nz_max = PetscMax(nz_max,nz);
2941     JJ  = jj + ii[i];
2942     for (j=0; j<nz; j++) {
2943       if (*JJ >= cstart) break;
2944       JJ++;
2945     }
2946     d = 0;
2947     for (; j<nz; j++) {
2948       if (*JJ++ >= cend) break;
2949       d++;
2950     }
2951     d_nnz[i] = d;
2952     o_nnz[i] = nz - d;
2953   }
2954   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2955   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2956 
2957   values = (PetscScalar*)V;
2958   if (!values) {
2959     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2960     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2961   }
2962   for (i=0; i<m; i++) {
2963     PetscInt          row    = i + rstart;
2964     PetscInt          ncols  = ii[i+1] - ii[i];
2965     const PetscInt    *icols = jj + ii[i];
2966     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2967     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2968   }
2969 
2970   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2971   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2972   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2973   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2974   PetscFunctionReturn(0);
2975 }
2976 EXTERN_C_END
2977 
2978 #undef __FUNCT__
2979 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2980 /*@C
2981    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2982    (the default parallel PETSc format).
2983 
2984    Collective on MPI_Comm
2985 
2986    Input Parameters:
2987 +  A - the matrix
2988 .  bs - the block size
2989 .  i - the indices into j for the start of each local row (starts with zero)
2990 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2991 -  v - optional values in the matrix
2992 
2993    Level: developer
2994 
2995 .keywords: matrix, aij, compressed row, sparse, parallel
2996 
2997 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2998 @*/
2999 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3000 {
3001   PetscErrorCode ierr;
3002 
3003   PetscFunctionBegin;
3004   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3005   PetscValidType(B,1);
3006   PetscValidLogicalCollectiveInt(B,bs,2);
3007   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3008   PetscFunctionReturn(0);
3009 }
3010 
3011 EXTERN_C_BEGIN
3012 #undef __FUNCT__
3013 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
3014 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
3015 {
3016   Mat_MPIBAIJ    *b;
3017   PetscErrorCode ierr;
3018   PetscInt       i, newbs = PetscAbs(bs);
3019   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3020 
3021   PetscFunctionBegin;
3022   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3023   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3024   if (bs < 0) {
3025     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
3026       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
3027     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3028     bs   = PetscAbs(bs);
3029   }
3030   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
3031   bs = newbs;
3032 
3033 
3034   if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
3035   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3036   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3037   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3038   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3039 
3040   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3041   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3042   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3043   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3044 
3045   if (d_nnz) {
3046     for (i=0; i<B->rmap->n/bs; i++) {
3047       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3048     }
3049   }
3050   if (o_nnz) {
3051     for (i=0; i<B->rmap->n/bs; i++) {
3052       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3053     }
3054   }
3055 
3056   b = (Mat_MPIBAIJ*)B->data;
3057   b->bs2 = bs*bs;
3058   b->mbs = B->rmap->n/bs;
3059   b->nbs = B->cmap->n/bs;
3060   b->Mbs = B->rmap->N/bs;
3061   b->Nbs = B->cmap->N/bs;
3062 
3063   for (i=0; i<=b->size; i++) {
3064     b->rangebs[i] = B->rmap->range[i]/bs;
3065   }
3066   b->rstartbs = B->rmap->rstart/bs;
3067   b->rendbs   = B->rmap->rend/bs;
3068   b->cstartbs = B->cmap->rstart/bs;
3069   b->cendbs   = B->cmap->rend/bs;
3070 
3071   if (!B->preallocated) {
3072     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3073     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3074     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3075     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3076     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3077     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3078     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3079     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3080     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
3081   }
3082 
3083   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3084   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3085   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3086   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3087   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3088   B->preallocated = PETSC_TRUE;
3089   PetscFunctionReturn(0);
3090 }
3091 EXTERN_C_END
3092 
3093 EXTERN_C_BEGIN
3094 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3095 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3096 EXTERN_C_END
3097 
3098 
3099 EXTERN_C_BEGIN
3100 #undef __FUNCT__
3101 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3102 PetscErrorCode  MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
3103 {
3104   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3105   PetscErrorCode ierr;
3106   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3107   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3108   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3109 
3110   PetscFunctionBegin;
3111   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3112   ii[0] = 0;
3113   CHKMEMQ;
3114   for (i=0; i<M; i++) {
3115     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3116     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3117     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3118     /* remove one from count of matrix has diagonal */
3119     for (j=id[i]; j<id[i+1]; j++) {
3120       if (jd[j] == i) {ii[i+1]--;break;}
3121     }
3122   CHKMEMQ;
3123   }
3124   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3125   cnt = 0;
3126   for (i=0; i<M; i++) {
3127     for (j=io[i]; j<io[i+1]; j++) {
3128       if (garray[jo[j]] > rstart) break;
3129       jj[cnt++] = garray[jo[j]];
3130   CHKMEMQ;
3131     }
3132     for (k=id[i]; k<id[i+1]; k++) {
3133       if (jd[k] != i) {
3134         jj[cnt++] = rstart + jd[k];
3135   CHKMEMQ;
3136       }
3137     }
3138     for (;j<io[i+1]; j++) {
3139       jj[cnt++] = garray[jo[j]];
3140   CHKMEMQ;
3141     }
3142   }
3143   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
3144   PetscFunctionReturn(0);
3145 }
3146 EXTERN_C_END
3147 
3148 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3149 EXTERN_C_BEGIN
3150 PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*);
3151 EXTERN_C_END
3152 
3153 EXTERN_C_BEGIN
3154 #undef __FUNCT__
3155 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3156 PetscErrorCode  MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat)
3157 {
3158   PetscErrorCode ierr;
3159   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3160   Mat            B;
3161   Mat_MPIAIJ     *b;
3162 
3163   PetscFunctionBegin;
3164   if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled");
3165 
3166   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3167   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3168   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
3169   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3170   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
3171   b = (Mat_MPIAIJ*) B->data;
3172 
3173   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
3174   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3175   ierr = DisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3176   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3177   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3178   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3179   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3180   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3181   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3182   if (reuse == MAT_REUSE_MATRIX) {
3183     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3184   } else {
3185    *newmat = B;
3186   }
3187   PetscFunctionReturn(0);
3188 }
3189 EXTERN_C_END
3190 
3191 EXTERN_C_BEGIN
3192 #if defined(PETSC_HAVE_MUMPS)
3193 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3194 #endif
3195 EXTERN_C_END
3196 
3197 /*MC
3198    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3199 
3200    Options Database Keys:
3201 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3202 . -mat_block_size <bs> - set the blocksize used to store the matrix
3203 - -mat_use_hash_table <fact>
3204 
3205   Level: beginner
3206 
3207 .seealso: MatCreateMPIBAIJ
3208 M*/
3209 
3210 EXTERN_C_BEGIN
3211 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,const MatType,MatReuse,Mat*);
3212 EXTERN_C_END
3213 
3214 EXTERN_C_BEGIN
3215 #undef __FUNCT__
3216 #define __FUNCT__ "MatCreate_MPIBAIJ"
3217 PetscErrorCode  MatCreate_MPIBAIJ(Mat B)
3218 {
3219   Mat_MPIBAIJ    *b;
3220   PetscErrorCode ierr;
3221   PetscBool      flg;
3222 
3223   PetscFunctionBegin;
3224   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3225   B->data = (void*)b;
3226 
3227   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3228   B->assembled  = PETSC_FALSE;
3229 
3230   B->insertmode = NOT_SET_VALUES;
3231   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
3232   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
3233 
3234   /* build local table of row and column ownerships */
3235   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3236 
3237   /* build cache for off array entries formed */
3238   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
3239   b->donotstash  = PETSC_FALSE;
3240   b->colmap      = PETSC_NULL;
3241   b->garray      = PETSC_NULL;
3242   b->roworiented = PETSC_TRUE;
3243 
3244   /* stuff used in block assembly */
3245   b->barray       = 0;
3246 
3247   /* stuff used for matrix vector multiply */
3248   b->lvec         = 0;
3249   b->Mvctx        = 0;
3250 
3251   /* stuff for MatGetRow() */
3252   b->rowindices   = 0;
3253   b->rowvalues    = 0;
3254   b->getrowactive = PETSC_FALSE;
3255 
3256   /* hash table stuff */
3257   b->ht           = 0;
3258   b->hd           = 0;
3259   b->ht_size      = 0;
3260   b->ht_flag      = PETSC_FALSE;
3261   b->ht_fact      = 0;
3262   b->ht_total_ct  = 0;
3263   b->ht_insert_ct = 0;
3264 
3265   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3266   b->ijonly       = PETSC_FALSE;
3267 
3268   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3269     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3270     if (flg) {
3271       PetscReal fact = 1.39;
3272       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3273       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
3274       if (fact <= 1.0) fact = 1.39;
3275       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3276       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3277     }
3278   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3279 
3280 #if defined(PETSC_HAVE_MUMPS)
3281   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3282 #endif
3283   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3284                                      "MatConvert_MPIBAIJ_MPIAdj",
3285                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3286   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",
3287                                      "MatConvert_MPIBAIJ_MPIAIJ",
3288                                       MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3289   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",
3290                                      "MatConvert_MPIBAIJ_MPISBAIJ",
3291                                       MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3292   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3293                                      "MatStoreValues_MPIBAIJ",
3294                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3295   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3296                                      "MatRetrieveValues_MPIBAIJ",
3297                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3298   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3299                                      "MatGetDiagonalBlock_MPIBAIJ",
3300                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3301   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3302                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3303                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3304   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3305 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3306 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3307   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3308                                      "MatDiagonalScaleLocal_MPIBAIJ",
3309                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3310   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3311                                      "MatSetHashTableFactor_MPIBAIJ",
3312                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3313   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",
3314                                      "MatConvert_MPIBAIJ_MPIBSTRM",
3315                                       MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3316   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3317   PetscFunctionReturn(0);
3318 }
3319 EXTERN_C_END
3320 
3321 /*MC
3322    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3323 
3324    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3325    and MATMPIBAIJ otherwise.
3326 
3327    Options Database Keys:
3328 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3329 
3330   Level: beginner
3331 
3332 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3333 M*/
3334 
3335 #undef __FUNCT__
3336 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3337 /*@C
3338    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3339    (block compressed row).  For good matrix assembly performance
3340    the user should preallocate the matrix storage by setting the parameters
3341    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3342    performance can be increased by more than a factor of 50.
3343 
3344    Collective on Mat
3345 
3346    Input Parameters:
3347 +  A - the matrix
3348 .  bs   - size of blockk
3349 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3350            submatrix  (same for all local rows)
3351 .  d_nnz - array containing the number of block nonzeros in the various block rows
3352            of the in diagonal portion of the local (possibly different for each block
3353            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3354            set it even if it is zero.
3355 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3356            submatrix (same for all local rows).
3357 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3358            off-diagonal portion of the local submatrix (possibly different for
3359            each block row) or PETSC_NULL.
3360 
3361    If the *_nnz parameter is given then the *_nz parameter is ignored
3362 
3363    Options Database Keys:
3364 +   -mat_block_size - size of the blocks to use
3365 -   -mat_use_hash_table <fact>
3366 
3367    Notes:
3368    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3369    than it must be used on all processors that share the object for that argument.
3370 
3371    Storage Information:
3372    For a square global matrix we define each processor's diagonal portion
3373    to be its local rows and the corresponding columns (a square submatrix);
3374    each processor's off-diagonal portion encompasses the remainder of the
3375    local matrix (a rectangular submatrix).
3376 
3377    The user can specify preallocated storage for the diagonal part of
3378    the local submatrix with either d_nz or d_nnz (not both).  Set
3379    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3380    memory allocation.  Likewise, specify preallocated storage for the
3381    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3382 
3383    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3384    the figure below we depict these three local rows and all columns (0-11).
3385 
3386 .vb
3387            0 1 2 3 4 5 6 7 8 9 10 11
3388           -------------------
3389    row 3  |  o o o d d d o o o o o o
3390    row 4  |  o o o d d d o o o o o o
3391    row 5  |  o o o d d d o o o o o o
3392           -------------------
3393 .ve
3394 
3395    Thus, any entries in the d locations are stored in the d (diagonal)
3396    submatrix, and any entries in the o locations are stored in the
3397    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3398    stored simply in the MATSEQBAIJ format for compressed row storage.
3399 
3400    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3401    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3402    In general, for PDE problems in which most nonzeros are near the diagonal,
3403    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3404    or you will get TERRIBLE performance; see the users' manual chapter on
3405    matrices.
3406 
3407    You can call MatGetInfo() to get information on how effective the preallocation was;
3408    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3409    You can also run with the option -info and look for messages with the string
3410    malloc in them to see if additional memory allocation was needed.
3411 
3412    Level: intermediate
3413 
3414 .keywords: matrix, block, aij, compressed row, sparse, parallel
3415 
3416 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3417 @*/
3418 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3419 {
3420   PetscErrorCode ierr;
3421 
3422   PetscFunctionBegin;
3423   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3424   PetscValidType(B,1);
3425   PetscValidLogicalCollectiveInt(B,bs,2);
3426   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3427   PetscFunctionReturn(0);
3428 }
3429 
3430 #undef __FUNCT__
3431 #define __FUNCT__ "MatCreateMPIBAIJ"
3432 /*@C
3433    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3434    (block compressed row).  For good matrix assembly performance
3435    the user should preallocate the matrix storage by setting the parameters
3436    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3437    performance can be increased by more than a factor of 50.
3438 
3439    Collective on MPI_Comm
3440 
3441    Input Parameters:
3442 +  comm - MPI communicator
3443 .  bs   - size of blockk
3444 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3445            This value should be the same as the local size used in creating the
3446            y vector for the matrix-vector product y = Ax.
3447 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3448            This value should be the same as the local size used in creating the
3449            x vector for the matrix-vector product y = Ax.
3450 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3451 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3452 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3453            submatrix  (same for all local rows)
3454 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3455            of the in diagonal portion of the local (possibly different for each block
3456            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3457            and set it even if it is zero.
3458 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3459            submatrix (same for all local rows).
3460 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3461            off-diagonal portion of the local submatrix (possibly different for
3462            each block row) or PETSC_NULL.
3463 
3464    Output Parameter:
3465 .  A - the matrix
3466 
3467    Options Database Keys:
3468 +   -mat_block_size - size of the blocks to use
3469 -   -mat_use_hash_table <fact>
3470 
3471    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3472    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3473    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3474 
3475    Notes:
3476    If the *_nnz parameter is given then the *_nz parameter is ignored
3477 
3478    A nonzero block is any block that as 1 or more nonzeros in it
3479 
3480    The user MUST specify either the local or global matrix dimensions
3481    (possibly both).
3482 
3483    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3484    than it must be used on all processors that share the object for that argument.
3485 
3486    Storage Information:
3487    For a square global matrix we define each processor's diagonal portion
3488    to be its local rows and the corresponding columns (a square submatrix);
3489    each processor's off-diagonal portion encompasses the remainder of the
3490    local matrix (a rectangular submatrix).
3491 
3492    The user can specify preallocated storage for the diagonal part of
3493    the local submatrix with either d_nz or d_nnz (not both).  Set
3494    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3495    memory allocation.  Likewise, specify preallocated storage for the
3496    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3497 
3498    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3499    the figure below we depict these three local rows and all columns (0-11).
3500 
3501 .vb
3502            0 1 2 3 4 5 6 7 8 9 10 11
3503           -------------------
3504    row 3  |  o o o d d d o o o o o o
3505    row 4  |  o o o d d d o o o o o o
3506    row 5  |  o o o d d d o o o o o o
3507           -------------------
3508 .ve
3509 
3510    Thus, any entries in the d locations are stored in the d (diagonal)
3511    submatrix, and any entries in the o locations are stored in the
3512    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3513    stored simply in the MATSEQBAIJ format for compressed row storage.
3514 
3515    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3516    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3517    In general, for PDE problems in which most nonzeros are near the diagonal,
3518    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3519    or you will get TERRIBLE performance; see the users' manual chapter on
3520    matrices.
3521 
3522    Level: intermediate
3523 
3524 .keywords: matrix, block, aij, compressed row, sparse, parallel
3525 
3526 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3527 @*/
3528 PetscErrorCode  MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3529 {
3530   PetscErrorCode ierr;
3531   PetscMPIInt    size;
3532 
3533   PetscFunctionBegin;
3534   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3535   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3536   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3537   if (size > 1) {
3538     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3539     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3540   } else {
3541     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3542     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3543   }
3544   PetscFunctionReturn(0);
3545 }
3546 
3547 #undef __FUNCT__
3548 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3549 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3550 {
3551   Mat            mat;
3552   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3553   PetscErrorCode ierr;
3554   PetscInt       len=0;
3555 
3556   PetscFunctionBegin;
3557   *newmat       = 0;
3558   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3559   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3560   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3561   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3562 
3563   mat->factortype   = matin->factortype;
3564   mat->preallocated = PETSC_TRUE;
3565   mat->assembled    = PETSC_TRUE;
3566   mat->insertmode   = NOT_SET_VALUES;
3567 
3568   a      = (Mat_MPIBAIJ*)mat->data;
3569   mat->rmap->bs  = matin->rmap->bs;
3570   a->bs2   = oldmat->bs2;
3571   a->mbs   = oldmat->mbs;
3572   a->nbs   = oldmat->nbs;
3573   a->Mbs   = oldmat->Mbs;
3574   a->Nbs   = oldmat->Nbs;
3575 
3576   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3577   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3578 
3579   a->size         = oldmat->size;
3580   a->rank         = oldmat->rank;
3581   a->donotstash   = oldmat->donotstash;
3582   a->roworiented  = oldmat->roworiented;
3583   a->rowindices   = 0;
3584   a->rowvalues    = 0;
3585   a->getrowactive = PETSC_FALSE;
3586   a->barray       = 0;
3587   a->rstartbs     = oldmat->rstartbs;
3588   a->rendbs       = oldmat->rendbs;
3589   a->cstartbs     = oldmat->cstartbs;
3590   a->cendbs       = oldmat->cendbs;
3591 
3592   /* hash table stuff */
3593   a->ht           = 0;
3594   a->hd           = 0;
3595   a->ht_size      = 0;
3596   a->ht_flag      = oldmat->ht_flag;
3597   a->ht_fact      = oldmat->ht_fact;
3598   a->ht_total_ct  = 0;
3599   a->ht_insert_ct = 0;
3600 
3601   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3602   if (oldmat->colmap) {
3603 #if defined (PETSC_USE_CTABLE)
3604   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3605 #else
3606   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3607   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3608   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3609 #endif
3610   } else a->colmap = 0;
3611 
3612   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3613     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3614     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3615     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3616   } else a->garray = 0;
3617 
3618   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3619   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3620   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3621   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3622   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3623 
3624   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3625   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3626   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3627   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3628   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3629   *newmat = mat;
3630 
3631   PetscFunctionReturn(0);
3632 }
3633 
3634 #undef __FUNCT__
3635 #define __FUNCT__ "MatLoad_MPIBAIJ"
3636 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3637 {
3638   PetscErrorCode ierr;
3639   int            fd;
3640   PetscInt       i,nz,j,rstart,rend;
3641   PetscScalar    *vals,*buf;
3642   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3643   MPI_Status     status;
3644   PetscMPIInt    rank,size,maxnz;
3645   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3646   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3647   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3648   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3649   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3650   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3651 
3652   PetscFunctionBegin;
3653   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3654     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3655   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3656 
3657   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3658   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3659   if (!rank) {
3660     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3661     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3662     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3663   }
3664 
3665   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3666 
3667   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3668   M = header[1]; N = header[2];
3669 
3670   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3671   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3672   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3673 
3674   /* If global sizes are set, check if they are consistent with that given in the file */
3675   if (sizesset) {
3676     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3677   }
3678   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3679   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3680 
3681   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3682 
3683   /*
3684      This code adds extra rows to make sure the number of rows is
3685      divisible by the blocksize
3686   */
3687   Mbs        = M/bs;
3688   extra_rows = bs - M + bs*Mbs;
3689   if (extra_rows == bs) extra_rows = 0;
3690   else                  Mbs++;
3691   if (extra_rows && !rank) {
3692     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3693   }
3694 
3695   /* determine ownership of all rows */
3696   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3697     mbs        = Mbs/size + ((Mbs % size) > rank);
3698     m          = mbs*bs;
3699   } else { /* User set */
3700     m          = newmat->rmap->n;
3701     mbs        = m/bs;
3702   }
3703   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3704   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3705 
3706   /* process 0 needs enough room for process with most rows */
3707   if (!rank) {
3708     mmax = rowners[1];
3709     for (i=2; i<size; i++) {
3710       mmax = PetscMax(mmax,rowners[i]);
3711     }
3712     mmax*=bs;
3713   } else mmax = m;
3714 
3715   rowners[0] = 0;
3716   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3717   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3718   rstart = rowners[rank];
3719   rend   = rowners[rank+1];
3720 
3721   /* distribute row lengths to all processors */
3722   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3723   if (!rank) {
3724     mend = m;
3725     if (size == 1) mend = mend - extra_rows;
3726     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3727     for (j=mend; j<m; j++) locrowlens[j] = 1;
3728     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3729     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3730     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3731     for (j=0; j<m; j++) {
3732       procsnz[0] += locrowlens[j];
3733     }
3734     for (i=1; i<size; i++) {
3735       mend = browners[i+1] - browners[i];
3736       if (i == size-1) mend = mend - extra_rows;
3737       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3738       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3739       /* calculate the number of nonzeros on each processor */
3740       for (j=0; j<browners[i+1]-browners[i]; j++) {
3741         procsnz[i] += rowlengths[j];
3742       }
3743       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3744     }
3745     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3746   } else {
3747     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3748   }
3749 
3750   if (!rank) {
3751     /* determine max buffer needed and allocate it */
3752     maxnz = procsnz[0];
3753     for (i=1; i<size; i++) {
3754       maxnz = PetscMax(maxnz,procsnz[i]);
3755     }
3756     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3757 
3758     /* read in my part of the matrix column indices  */
3759     nz     = procsnz[0];
3760     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3761     mycols = ibuf;
3762     if (size == 1)  nz -= extra_rows;
3763     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3764     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3765 
3766     /* read in every ones (except the last) and ship off */
3767     for (i=1; i<size-1; i++) {
3768       nz   = procsnz[i];
3769       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3770       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3771     }
3772     /* read in the stuff for the last proc */
3773     if (size != 1) {
3774       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3775       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3776       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3777       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3778     }
3779     ierr = PetscFree(cols);CHKERRQ(ierr);
3780   } else {
3781     /* determine buffer space needed for message */
3782     nz = 0;
3783     for (i=0; i<m; i++) {
3784       nz += locrowlens[i];
3785     }
3786     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3787     mycols = ibuf;
3788     /* receive message of column indices*/
3789     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3790     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3791     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3792   }
3793 
3794   /* loop over local rows, determining number of off diagonal entries */
3795   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3796   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3797   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3798   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3799   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3800   rowcount = 0; nzcount = 0;
3801   for (i=0; i<mbs; i++) {
3802     dcount  = 0;
3803     odcount = 0;
3804     for (j=0; j<bs; j++) {
3805       kmax = locrowlens[rowcount];
3806       for (k=0; k<kmax; k++) {
3807         tmp = mycols[nzcount++]/bs;
3808         if (!mask[tmp]) {
3809           mask[tmp] = 1;
3810           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3811           else masked1[dcount++] = tmp;
3812         }
3813       }
3814       rowcount++;
3815     }
3816 
3817     dlens[i]  = dcount;
3818     odlens[i] = odcount;
3819 
3820     /* zero out the mask elements we set */
3821     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3822     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3823   }
3824 
3825 
3826   if (!sizesset) {
3827     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3828   }
3829   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3830 
3831   if (!rank) {
3832     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3833     /* read in my part of the matrix numerical values  */
3834     nz = procsnz[0];
3835     vals = buf;
3836     mycols = ibuf;
3837     if (size == 1)  nz -= extra_rows;
3838     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3839     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3840 
3841     /* insert into matrix */
3842     jj      = rstart*bs;
3843     for (i=0; i<m; i++) {
3844       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3845       mycols += locrowlens[i];
3846       vals   += locrowlens[i];
3847       jj++;
3848     }
3849     /* read in other processors (except the last one) and ship out */
3850     for (i=1; i<size-1; i++) {
3851       nz   = procsnz[i];
3852       vals = buf;
3853       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3854       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3855     }
3856     /* the last proc */
3857     if (size != 1){
3858       nz   = procsnz[i] - extra_rows;
3859       vals = buf;
3860       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3861       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3862       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3863     }
3864     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3865   } else {
3866     /* receive numeric values */
3867     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3868 
3869     /* receive message of values*/
3870     vals   = buf;
3871     mycols = ibuf;
3872     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
3873     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3874     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3875 
3876     /* insert into matrix */
3877     jj      = rstart*bs;
3878     for (i=0; i<m; i++) {
3879       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3880       mycols += locrowlens[i];
3881       vals   += locrowlens[i];
3882       jj++;
3883     }
3884   }
3885   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3886   ierr = PetscFree(buf);CHKERRQ(ierr);
3887   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3888   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3889   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3890   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3891   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3892   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3893 
3894   PetscFunctionReturn(0);
3895 }
3896 
3897 #undef __FUNCT__
3898 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3899 /*@
3900    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3901 
3902    Input Parameters:
3903 .  mat  - the matrix
3904 .  fact - factor
3905 
3906    Not Collective, each process can use a different factor
3907 
3908    Level: advanced
3909 
3910   Notes:
3911    This can also be set by the command line option: -mat_use_hash_table <fact>
3912 
3913 .keywords: matrix, hashtable, factor, HT
3914 
3915 .seealso: MatSetOption()
3916 @*/
3917 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3918 {
3919   PetscErrorCode ierr;
3920 
3921   PetscFunctionBegin;
3922   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3923   PetscFunctionReturn(0);
3924 }
3925 
3926 EXTERN_C_BEGIN
3927 #undef __FUNCT__
3928 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3929 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3930 {
3931   Mat_MPIBAIJ *baij;
3932 
3933   PetscFunctionBegin;
3934   baij = (Mat_MPIBAIJ*)mat->data;
3935   baij->ht_fact = fact;
3936   PetscFunctionReturn(0);
3937 }
3938 EXTERN_C_END
3939 
3940 #undef __FUNCT__
3941 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3942 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3943 {
3944   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3945   PetscFunctionBegin;
3946   *Ad     = a->A;
3947   *Ao     = a->B;
3948   *colmap = a->garray;
3949   PetscFunctionReturn(0);
3950 }
3951 
3952 /*
3953     Special version for direct calls from Fortran (to eliminate two function call overheads
3954 */
3955 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3956 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3957 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3958 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3959 #endif
3960 
3961 #undef __FUNCT__
3962 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3963 /*@C
3964   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3965 
3966   Collective on Mat
3967 
3968   Input Parameters:
3969 + mat - the matrix
3970 . min - number of input rows
3971 . im - input rows
3972 . nin - number of input columns
3973 . in - input columns
3974 . v - numerical values input
3975 - addvin - INSERT_VALUES or ADD_VALUES
3976 
3977   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3978 
3979   Level: advanced
3980 
3981 .seealso:   MatSetValuesBlocked()
3982 @*/
3983 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3984 {
3985   /* convert input arguments to C version */
3986   Mat             mat = *matin;
3987   PetscInt        m = *min, n = *nin;
3988   InsertMode      addv = *addvin;
3989 
3990   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3991   const MatScalar *value;
3992   MatScalar       *barray=baij->barray;
3993   PetscBool       roworiented = baij->roworiented;
3994   PetscErrorCode  ierr;
3995   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3996   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3997   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3998 
3999   PetscFunctionBegin;
4000   /* tasks normally handled by MatSetValuesBlocked() */
4001   if (mat->insertmode == NOT_SET_VALUES) {
4002     mat->insertmode = addv;
4003   }
4004 #if defined(PETSC_USE_DEBUG)
4005   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4006   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
4007 #endif
4008   if (mat->assembled) {
4009     mat->was_assembled = PETSC_TRUE;
4010     mat->assembled     = PETSC_FALSE;
4011   }
4012   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4013 
4014 
4015   if(!barray) {
4016     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
4017     baij->barray = barray;
4018   }
4019 
4020   if (roworiented) {
4021     stepval = (n-1)*bs;
4022   } else {
4023     stepval = (m-1)*bs;
4024   }
4025   for (i=0; i<m; i++) {
4026     if (im[i] < 0) continue;
4027 #if defined(PETSC_USE_DEBUG)
4028     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
4029 #endif
4030     if (im[i] >= rstart && im[i] < rend) {
4031       row = im[i] - rstart;
4032       for (j=0; j<n; j++) {
4033         /* If NumCol = 1 then a copy is not required */
4034         if ((roworiented) && (n == 1)) {
4035           barray = (MatScalar*)v + i*bs2;
4036         } else if((!roworiented) && (m == 1)) {
4037           barray = (MatScalar*)v + j*bs2;
4038         } else { /* Here a copy is required */
4039           if (roworiented) {
4040             value = v + i*(stepval+bs)*bs + j*bs;
4041           } else {
4042             value = v + j*(stepval+bs)*bs + i*bs;
4043           }
4044           for (ii=0; ii<bs; ii++,value+=stepval) {
4045             for (jj=0; jj<bs; jj++) {
4046               *barray++  = *value++;
4047             }
4048           }
4049           barray -=bs2;
4050         }
4051 
4052         if (in[j] >= cstart && in[j] < cend){
4053           col  = in[j] - cstart;
4054           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4055         }
4056         else if (in[j] < 0) continue;
4057 #if defined(PETSC_USE_DEBUG)
4058         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
4059 #endif
4060         else {
4061           if (mat->was_assembled) {
4062             if (!baij->colmap) {
4063               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
4064             }
4065 
4066 #if defined(PETSC_USE_DEBUG)
4067 #if defined (PETSC_USE_CTABLE)
4068             { PetscInt data;
4069               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4070               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4071             }
4072 #else
4073             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4074 #endif
4075 #endif
4076 #if defined (PETSC_USE_CTABLE)
4077 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4078             col  = (col - 1)/bs;
4079 #else
4080             col = (baij->colmap[in[j]] - 1)/bs;
4081 #endif
4082             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4083               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4084               col =  in[j];
4085             }
4086           }
4087           else col = in[j];
4088           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4089         }
4090       }
4091     } else {
4092       if (!baij->donotstash) {
4093         if (roworiented) {
4094           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4095         } else {
4096           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4097         }
4098       }
4099     }
4100   }
4101 
4102   /* task normally handled by MatSetValuesBlocked() */
4103   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4104   PetscFunctionReturn(0);
4105 }
4106 
4107 #undef __FUNCT__
4108 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4109 /*@
4110      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4111          CSR format the local rows.
4112 
4113    Collective on MPI_Comm
4114 
4115    Input Parameters:
4116 +  comm - MPI communicator
4117 .  bs - the block size, only a block size of 1 is supported
4118 .  m - number of local rows (Cannot be PETSC_DECIDE)
4119 .  n - This value should be the same as the local size used in creating the
4120        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4121        calculated if N is given) For square matrices n is almost always m.
4122 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4123 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4124 .   i - row indices
4125 .   j - column indices
4126 -   a - matrix values
4127 
4128    Output Parameter:
4129 .   mat - the matrix
4130 
4131    Level: intermediate
4132 
4133    Notes:
4134        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4135      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4136      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4137 
4138        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4139 
4140 .keywords: matrix, aij, compressed row, sparse, parallel
4141 
4142 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4143           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
4144 @*/
4145 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4146 {
4147   PetscErrorCode ierr;
4148 
4149 
4150  PetscFunctionBegin;
4151   if (i[0]) {
4152     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4153   }
4154   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4155   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4156   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4157   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4158   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4159   PetscFunctionReturn(0);
4160 }
4161