xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision e80fee0d83fca2e79359165ac082b818fca7907f)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpibaij.c,v 1.122 1998/05/12 16:16:36 bsmith Exp bsmith $";
3 #endif
4 
5 #include "pinclude/pviewer.h"         /*I "mat.h" I*/
6 #include "src/mat/impls/baij/mpi/mpibaij.h"
7 #include "src/vec/vecimpl.h"
8 
9 
10 extern int MatSetUpMultiply_MPIBAIJ(Mat);
11 extern int DisAssemble_MPIBAIJ(Mat);
12 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
13 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
14 
15 /*
16      Local utility routine that creates a mapping from the global column
17    number to the local number in the off-diagonal part of the local
18    storage of the matrix.  This is done in a non scable way since the
19    length of colmap equals the global matrix length.
20 */
21 #undef __FUNC__
22 #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
23 static int CreateColmap_MPIBAIJ_Private(Mat mat)
24 {
25   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
26   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
27   int         nbs = B->nbs,i,bs=B->bs;;
28 
29   PetscFunctionBegin;
30   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
31   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
32   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
33   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
34   PetscFunctionReturn(0);
35 }
36 
37 #define CHUNKSIZE  10
38 
39 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
40 { \
41  \
42     brow = row/bs;  \
43     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
44     rmax = aimax[brow]; nrow = ailen[brow]; \
45       bcol = col/bs; \
46       ridx = row % bs; cidx = col % bs; \
47       low = 0; high = nrow; \
48       while (high-low > 3) { \
49         t = (low+high)/2; \
50         if (rp[t] > bcol) high = t; \
51         else              low  = t; \
52       } \
53       for ( _i=low; _i<high; _i++ ) { \
54         if (rp[_i] > bcol) break; \
55         if (rp[_i] == bcol) { \
56           bap  = ap +  bs2*_i + bs*cidx + ridx; \
57           if (addv == ADD_VALUES) *bap += value;  \
58           else                    *bap  = value;  \
59           goto a_noinsert; \
60         } \
61       } \
62       if (a->nonew == 1) goto a_noinsert; \
63       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
64       if (nrow >= rmax) { \
65         /* there is no extra room in row, therefore enlarge */ \
66         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
67         Scalar *new_a; \
68  \
69         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
70  \
71         /* malloc new storage space */ \
72         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
73         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
74         new_j   = (int *) (new_a + bs2*new_nz); \
75         new_i   = new_j + new_nz; \
76  \
77         /* copy over old data into new slots */ \
78         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
79         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
80         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
81         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
82         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
83                                                            len*sizeof(int)); \
84         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
85         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
86         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
87                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
88         /* free up old matrix storage */ \
89         PetscFree(a->a);  \
90         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
91         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
92         a->singlemalloc = 1; \
93  \
94         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
95         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
96         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
97         a->maxnz += bs2*CHUNKSIZE; \
98         a->reallocs++; \
99         a->nz++; \
100       } \
101       N = nrow++ - 1;  \
102       /* shift up all the later entries in this row */ \
103       for ( ii=N; ii>=_i; ii-- ) { \
104         rp[ii+1] = rp[ii]; \
105         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
106       } \
107       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
108       rp[_i]                      = bcol;  \
109       ap[bs2*_i + bs*cidx + ridx] = value;  \
110       a_noinsert:; \
111     ailen[brow] = nrow; \
112 }
113 
114 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
115 { \
116  \
117     brow = row/bs;  \
118     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
119     rmax = bimax[brow]; nrow = bilen[brow]; \
120       bcol = col/bs; \
121       ridx = row % bs; cidx = col % bs; \
122       low = 0; high = nrow; \
123       while (high-low > 3) { \
124         t = (low+high)/2; \
125         if (rp[t] > bcol) high = t; \
126         else              low  = t; \
127       } \
128       for ( _i=low; _i<high; _i++ ) { \
129         if (rp[_i] > bcol) break; \
130         if (rp[_i] == bcol) { \
131           bap  = ap +  bs2*_i + bs*cidx + ridx; \
132           if (addv == ADD_VALUES) *bap += value;  \
133           else                    *bap  = value;  \
134           goto b_noinsert; \
135         } \
136       } \
137       if (b->nonew == 1) goto b_noinsert; \
138       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
139       if (nrow >= rmax) { \
140         /* there is no extra room in row, therefore enlarge */ \
141         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
142         Scalar *new_a; \
143  \
144         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
145  \
146         /* malloc new storage space */ \
147         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
148         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
149         new_j   = (int *) (new_a + bs2*new_nz); \
150         new_i   = new_j + new_nz; \
151  \
152         /* copy over old data into new slots */ \
153         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
154         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
155         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
156         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
157         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
158                                                            len*sizeof(int)); \
159         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
160         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
161         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
162                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
163         /* free up old matrix storage */ \
164         PetscFree(b->a);  \
165         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
166         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
167         b->singlemalloc = 1; \
168  \
169         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
170         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
171         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
172         b->maxnz += bs2*CHUNKSIZE; \
173         b->reallocs++; \
174         b->nz++; \
175       } \
176       N = nrow++ - 1;  \
177       /* shift up all the later entries in this row */ \
178       for ( ii=N; ii>=_i; ii-- ) { \
179         rp[ii+1] = rp[ii]; \
180         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
181       } \
182       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
183       rp[_i]                      = bcol;  \
184       ap[bs2*_i + bs*cidx + ridx] = value;  \
185       b_noinsert:; \
186     bilen[brow] = nrow; \
187 }
188 
189 #undef __FUNC__
190 #define __FUNC__ "MatSetValues_MPIBAIJ"
191 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
192 {
193   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
194   Scalar      value;
195   int         ierr,i,j,row,col;
196   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
197   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
198   int         cend_orig=baij->cend_bs,bs=baij->bs;
199 
200   /* Some Variables required in the macro */
201   Mat         A = baij->A;
202   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
203   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
204   Scalar      *aa=a->a;
205 
206   Mat         B = baij->B;
207   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
208   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
209   Scalar      *ba=b->a;
210 
211   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
212   int         low,high,t,ridx,cidx,bs2=a->bs2;
213   Scalar      *ap,*bap;
214 
215   PetscFunctionBegin;
216   for ( i=0; i<m; i++ ) {
217 #if defined(USE_PETSC_BOPT_g)
218     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
219     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
220 #endif
221     if (im[i] >= rstart_orig && im[i] < rend_orig) {
222       row = im[i] - rstart_orig;
223       for ( j=0; j<n; j++ ) {
224         if (in[j] >= cstart_orig && in[j] < cend_orig){
225           col = in[j] - cstart_orig;
226           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
227           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
228           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
229         }
230 #if defined(USE_PETSC_BOPT_g)
231         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
232         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
233 #endif
234         else {
235           if (mat->was_assembled) {
236             if (!baij->colmap) {
237               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
238             }
239             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
240             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
241               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
242               col =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
244               B = baij->B;
245               b = (Mat_SeqBAIJ *) (B)->data;
246               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
247               ba=b->a;
248             }
249           } else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
252           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
253         }
254       }
255     } else {
256       if (roworiented && !baij->donotstash) {
257         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
258       } else {
259         if (!baij->donotstash) {
260           row = im[i];
261 	  for ( j=0; j<n; j++ ) {
262 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
263           }
264         }
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
272 #undef __FUNC__
273 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
274 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
275 {
276   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
277   Scalar      *value,*barray=baij->barray;
278   int         ierr,i,j,ii,jj,row,col,k,l;
279   int         roworiented = baij->roworiented,rstart=baij->rstart ;
280   int         rend=baij->rend,cstart=baij->cstart,stepval;
281   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
282 
283   if(!barray) {
284     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
285   }
286 
287   if (roworiented) {
288     stepval = (n-1)*bs;
289   } else {
290     stepval = (m-1)*bs;
291   }
292   for ( i=0; i<m; i++ ) {
293 #if defined(USE_PETSC_BOPT_g)
294     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
295     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
296 #endif
297     if (im[i] >= rstart && im[i] < rend) {
298       row = im[i] - rstart;
299       for ( j=0; j<n; j++ ) {
300         /* If NumCol = 1 then a copy is not required */
301         if ((roworiented) && (n == 1)) {
302           barray = v + i*bs2;
303         } else if((!roworiented) && (m == 1)) {
304           barray = v + j*bs2;
305         } else { /* Here a copy is required */
306           if (roworiented) {
307             value = v + i*(stepval+bs)*bs + j*bs;
308           } else {
309             value = v + j*(stepval+bs)*bs + i*bs;
310           }
311           for ( ii=0; ii<bs; ii++,value+=stepval ) {
312             for (jj=0; jj<bs; jj++ ) {
313               *barray++  = *value++;
314             }
315           }
316           barray -=bs2;
317         }
318 
319         if (in[j] >= cstart && in[j] < cend){
320           col  = in[j] - cstart;
321           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
322         }
323 #if defined(USE_PETSC_BOPT_g)
324         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
325         else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
326 #endif
327         else {
328           if (mat->was_assembled) {
329             if (!baij->colmap) {
330               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
331             }
332 
333 #if defined(USE_PETSC_BOPT_g)
334             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
335 #endif
336             col = (baij->colmap[in[j]] - 1)/bs;
337             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
338               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
339               col =  in[j];
340             }
341           }
342           else col = in[j];
343           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
344         }
345       }
346     } else {
347       if (!baij->donotstash) {
348         if (roworiented ) {
349           row   = im[i]*bs;
350           value = v + i*(stepval+bs)*bs;
351           for ( j=0; j<bs; j++,row++ ) {
352             for ( k=0; k<n; k++ ) {
353               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
354                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
355               }
356             }
357           }
358         } else {
359           for ( j=0; j<n; j++ ) {
360             value = v + j*(stepval+bs)*bs + i*bs;
361             col   = in[j]*bs;
362             for ( k=0; k<bs; k++,col++,value+=stepval) {
363               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
364                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
365               }
366             }
367           }
368         }
369       }
370     }
371   }
372   PetscFunctionReturn(0);
373 }
374 #include <math.h>
375 #define HASH_KEY 0.6180339887
376 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
378 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
379 #undef __FUNC__
380 #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
381 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
382 {
383   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
384   int         ierr,i,j,row,col;
385   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
386   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
387   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
388   double      tmp;
389   Scalar      ** HD = baij->hd,value;
390 #if defined(USE_PETSC_BOPT_g)
391   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
392 #endif
393 
394   PetscFunctionBegin;
395 
396   for ( i=0; i<m; i++ ) {
397 #if defined(USE_PETSC_BOPT_g)
398     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
399     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
400 #endif
401       row = im[i];
402     if (row >= rstart_orig && row < rend_orig) {
403       for ( j=0; j<n; j++ ) {
404         col = in[j];
405         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
406         /* Look up into the Hash Table */
407         key = (row/bs)*Nbs+(col/bs)+1;
408         h1  = HASH(size,key,tmp);
409 
410 
411         idx = h1;
412 #if defined(USE_PETSC_BOPT_g)
413         insert_ct++;
414         total_ct++;
415         if (HT[idx] != key) {
416           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
417           if (idx == size) {
418             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
419             if (idx == h1) {
420               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
421             }
422           }
423         }
424 #else
425         if (HT[idx] != key) {
426           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
427           if (idx == size) {
428             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
429             if (idx == h1) {
430               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
431             }
432           }
433         }
434 #endif
435         /* A HASH table entry is found, so insert the values at the correct address */
436         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
437         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
438       }
439     } else {
440       if (roworiented && !baij->donotstash) {
441         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
442       } else {
443         if (!baij->donotstash) {
444           row = im[i];
445 	  for ( j=0; j<n; j++ ) {
446 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
447           }
448         }
449       }
450     }
451   }
452 #if defined(USE_PETSC_BOPT_g)
453   baij->ht_total_ct = total_ct;
454   baij->ht_insert_ct = insert_ct;
455 #endif
456   PetscFunctionReturn(0);
457 }
458 
459 #undef __FUNC__
460 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
461 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
462 {
463   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
464   int         ierr,i,j,ii,jj,row,col,k,l;
465   int         roworiented = baij->roworiented,rstart=baij->rstart ;
466   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
467   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
468   double      tmp;
469   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
470 #if defined(USE_PETSC_BOPT_g)
471   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
472 #endif
473 
474   PetscFunctionBegin;
475 
476   if (roworiented) {
477     stepval = (n-1)*bs;
478   } else {
479     stepval = (m-1)*bs;
480   }
481   for ( i=0; i<m; i++ ) {
482 #if defined(USE_PETSC_BOPT_g)
483     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
484     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
485 #endif
486     row   = im[i];
487     v_t   = v + i*bs2;
488     if (row >= rstart && row < rend) {
489       for ( j=0; j<n; j++ ) {
490         col = in[j];
491 
492         /* Look up into the Hash Table */
493         key = row*Nbs+col+1;
494         h1  = HASH(size,key,tmp);
495 
496         idx = h1;
497 #if defined(USE_PETSC_BOPT_g)
498         total_ct++;
499         insert_ct++;
500        if (HT[idx] != key) {
501           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
502           if (idx == size) {
503             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
504             if (idx == h1) {
505               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
506             }
507           }
508         }
509 #else
510         if (HT[idx] != key) {
511           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
512           if (idx == size) {
513             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
514             if (idx == h1) {
515               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
516             }
517           }
518         }
519 #endif
520         baij_a = HD[idx];
521         if (roworiented) {
522           /*value = v + i*(stepval+bs)*bs + j*bs;*/
523           /* value = v + (i*(stepval+bs)+j)*bs; */
524           value = v_t;
525           v_t  += bs;
526           if (addv == ADD_VALUES) {
527             for ( ii=0; ii<bs; ii++,value+=stepval) {
528               for ( jj=ii; jj<bs2; jj+=bs ) {
529                 baij_a[jj]  += *value++;
530               }
531             }
532           } else {
533             for ( ii=0; ii<bs; ii++,value+=stepval) {
534               for ( jj=ii; jj<bs2; jj+=bs ) {
535                 baij_a[jj]  = *value++;
536               }
537             }
538           }
539         } else {
540           value = v + j*(stepval+bs)*bs + i*bs;
541           if (addv == ADD_VALUES) {
542             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
543               for ( jj=0; jj<bs; jj++ ) {
544                 baij_a[jj]  += *value++;
545               }
546             }
547           } else {
548             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
549               for ( jj=0; jj<bs; jj++ ) {
550                 baij_a[jj]  = *value++;
551               }
552             }
553           }
554         }
555       }
556     } else {
557       if (!baij->donotstash) {
558         if (roworiented ) {
559           row   = im[i]*bs;
560           value = v + i*(stepval+bs)*bs;
561           for ( j=0; j<bs; j++,row++ ) {
562             for ( k=0; k<n; k++ ) {
563               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
564                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
565               }
566             }
567           }
568         } else {
569           for ( j=0; j<n; j++ ) {
570             value = v + j*(stepval+bs)*bs + i*bs;
571             col   = in[j]*bs;
572             for ( k=0; k<bs; k++,col++,value+=stepval) {
573               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
574                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
575               }
576             }
577           }
578         }
579       }
580     }
581   }
582 #if defined(USE_PETSC_BOPT_g)
583   baij->ht_total_ct = total_ct;
584   baij->ht_insert_ct = insert_ct;
585 #endif
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNC__
590 #define __FUNC__ "MatGetValues_MPIBAIJ"
591 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
592 {
593   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
594   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
595   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
596 
597   PetscFunctionBegin;
598   for ( i=0; i<m; i++ ) {
599     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
600     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
601     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
602       row = idxm[i] - bsrstart;
603       for ( j=0; j<n; j++ ) {
604         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
605         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
606         if (idxn[j] >= bscstart && idxn[j] < bscend){
607           col = idxn[j] - bscstart;
608           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
609         } else {
610           if (!baij->colmap) {
611             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
612           }
613           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
614              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
615           else {
616             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
617             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
618           }
619         }
620       }
621     } else {
622       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
623     }
624   }
625  PetscFunctionReturn(0);
626 }
627 
628 #undef __FUNC__
629 #define __FUNC__ "MatNorm_MPIBAIJ"
630 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
631 {
632   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
633   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
634   int        ierr, i,bs2=baij->bs2;
635   double     sum = 0.0;
636   Scalar     *v;
637 
638   PetscFunctionBegin;
639   if (baij->size == 1) {
640     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
641   } else {
642     if (type == NORM_FROBENIUS) {
643       v = amat->a;
644       for (i=0; i<amat->nz*bs2; i++ ) {
645 #if defined(USE_PETSC_COMPLEX)
646         sum += real(conj(*v)*(*v)); v++;
647 #else
648         sum += (*v)*(*v); v++;
649 #endif
650       }
651       v = bmat->a;
652       for (i=0; i<bmat->nz*bs2; i++ ) {
653 #if defined(USE_PETSC_COMPLEX)
654         sum += real(conj(*v)*(*v)); v++;
655 #else
656         sum += (*v)*(*v); v++;
657 #endif
658       }
659       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
660       *norm = sqrt(*norm);
661     } else {
662       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
663     }
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 #undef __FUNC__
669 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
670 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
671 {
672   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
673   MPI_Comm    comm = mat->comm;
674   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
675   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
676   MPI_Request *send_waits,*recv_waits;
677   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
678   InsertMode  addv;
679   Scalar      *rvalues,*svalues;
680 
681   PetscFunctionBegin;
682   if (baij->donotstash) {
683     baij->svalues    = 0; baij->rvalues    = 0;
684     baij->nsends     = 0; baij->nrecvs     = 0;
685     baij->send_waits = 0; baij->recv_waits = 0;
686     baij->rmax       = 0;
687     PetscFunctionReturn(0);
688   }
689 
690   /* make sure all processors are either in INSERTMODE or ADDMODE */
691   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
692   if (addv == (ADD_VALUES|INSERT_VALUES)) {
693     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
694   }
695   mat->insertmode = addv; /* in case this processor had no cache */
696 
697   /*  first count number of contributors to each processor */
698   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
699   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
700   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
701   for ( i=0; i<baij->stash.n; i++ ) {
702     idx = baij->stash.idx[i];
703     for ( j=0; j<size; j++ ) {
704       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
705         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
706       }
707     }
708   }
709   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
710 
711   /* inform other processors of number of messages and max length*/
712   work      = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
713   ierr      = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
714   nreceives = work[rank];
715   ierr      = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
716   nmax      = work[rank];
717   PetscFree(work);
718 
719   /* post receives:
720        1) each message will consist of ordered pairs
721      (global index,value) we store the global index as a double
722      to simplify the message passing.
723        2) since we don't know how long each individual message is we
724      allocate the largest needed buffer for each receive. Potentially
725      this is a lot of wasted space.
726 
727 
728        This could be done better.
729   */
730   rvalues    = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
731   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
732   for ( i=0; i<nreceives; i++ ) {
733     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
734   }
735 
736   /* do sends:
737       1) starts[i] gives the starting index in svalues for stuff going to
738          the ith processor
739   */
740   svalues    = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
741   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
742   starts     = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
743   starts[0] = 0;
744   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
745   for ( i=0; i<baij->stash.n; i++ ) {
746     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
747     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
748     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
749   }
750   PetscFree(owner);
751   starts[0] = 0;
752   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
753   count = 0;
754   for ( i=0; i<size; i++ ) {
755     if (procs[i]) {
756       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
757     }
758   }
759   PetscFree(starts); PetscFree(nprocs);
760 
761   /* Free cache space */
762   PLogInfo(baij->A,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
763   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
764 
765   baij->svalues    = svalues;    baij->rvalues    = rvalues;
766   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
767   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
768   baij->rmax       = nmax;
769 
770   PetscFunctionReturn(0);
771 }
772 
773 /*
774   Creates the hash table, and sets the table
775   This table is created only once.
776   If new entried need to be added to the matrix
777   then the hash table has to be destroyed and
778   recreated.
779 */
780 #undef __FUNC__
781 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
782 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
783 {
784   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
785   Mat         A = baij->A, B=baij->B;
786   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
787   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
788   int         size,bs2=baij->bs2,rstart=baij->rstart;
789   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
790   int         *HT,key;
791   Scalar      **HD;
792   double      tmp;
793 #if defined(USE_PETSC_BOPT_g)
794   int         ct=0,max=0;
795 #endif
796 
797   PetscFunctionBegin;
798   baij->ht_size=(int)(factor*nz);
799   size = baij->ht_size;
800 
801   if (baij->ht) {
802     PetscFunctionReturn(0);
803   }
804 
805   /* Allocate Memory for Hash Table */
806   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd);
807   baij->ht = (int*)(baij->hd + size);
808   HD = baij->hd;
809   HT = baij->ht;
810 
811 
812   PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));
813 
814 
815   /* Loop Over A */
816   for ( i=0; i<a->mbs; i++ ) {
817     for ( j=ai[i]; j<ai[i+1]; j++ ) {
818       row = i+rstart;
819       col = aj[j]+cstart;
820 
821       key = row*Nbs + col + 1;
822       h1  = HASH(size,key,tmp);
823       for ( k=0; k<size; k++ ){
824         if (HT[(h1+k)%size] == 0.0) {
825           HT[(h1+k)%size] = key;
826           HD[(h1+k)%size] = a->a + j*bs2;
827           break;
828 #if defined(USE_PETSC_BOPT_g)
829         } else {
830           ct++;
831 #endif
832         }
833       }
834 #if defined(USE_PETSC_BOPT_g)
835       if (k> max) max = k;
836 #endif
837     }
838   }
839   /* Loop Over B */
840   for ( i=0; i<b->mbs; i++ ) {
841     for ( j=bi[i]; j<bi[i+1]; j++ ) {
842       row = i+rstart;
843       col = garray[bj[j]];
844       key = row*Nbs + col + 1;
845       h1  = HASH(size,key,tmp);
846       for ( k=0; k<size; k++ ){
847         if (HT[(h1+k)%size] == 0.0) {
848           HT[(h1+k)%size] = key;
849           HD[(h1+k)%size] = b->a + j*bs2;
850           break;
851 #if defined(USE_PETSC_BOPT_g)
852         } else {
853           ct++;
854 #endif
855         }
856       }
857 #if defined(USE_PETSC_BOPT_g)
858       if (k> max) max = k;
859 #endif
860     }
861   }
862 
863   /* Print Summary */
864 #if defined(USE_PETSC_BOPT_g)
865   for ( i=0,j=0; i<size; i++)
866     if (HT[i]) {j++;}
867   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",
868            (j== 0)? 0.0:((double)(ct+j))/j,max);
869 #endif
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNC__
874 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
875 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
876 {
877   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
878   MPI_Status  *send_status,recv_status;
879   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
880   int         bs=baij->bs,row,col,other_disassembled;
881   Scalar      *values,val;
882   InsertMode  addv = mat->insertmode;
883 
884   PetscFunctionBegin;
885   /*  wait on receives */
886   while (count) {
887     ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
888     /* unpack receives into our local space */
889     values = baij->rvalues + 3*imdex*baij->rmax;
890     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
891     n = n/3;
892     for ( i=0; i<n; i++ ) {
893       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
894       col = (int) PetscReal(values[3*i+1]);
895       val = values[3*i+2];
896       if (col >= baij->cstart*bs && col < baij->cend*bs) {
897         col -= baij->cstart*bs;
898         ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
899       } else {
900         if (mat->was_assembled) {
901           if (!baij->colmap) {
902             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
903           }
904           col = (baij->colmap[col/bs]) - 1 + col%bs;
905           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
906             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
907             col = (int) PetscReal(values[3*i+1]);
908           }
909         }
910         ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
911       }
912     }
913     count--;
914   }
915   if (baij->recv_waits) PetscFree(baij->recv_waits);
916   if (baij->rvalues)    PetscFree(baij->rvalues);
917 
918   /* wait on sends */
919   if (baij->nsends) {
920     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
921     ierr        = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr);
922     PetscFree(send_status);
923   }
924   if (baij->send_waits) PetscFree(baij->send_waits);
925   if (baij->svalues)    PetscFree(baij->svalues);
926 
927   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
928   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
929 
930   /* determine if any processor has disassembled, if so we must
931      also disassemble ourselfs, in order that we may reassemble. */
932   /*
933      if nonzero structure of submatrix B cannot change then we know that
934      no processor disassembled thus we can skip this stuff
935   */
936   if (!((Mat_SeqBAIJ*) baij->B->data)->nonew)  {
937     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
938     if (mat->was_assembled && !other_disassembled) {
939       ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
940     }
941   }
942 
943   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
944     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
945   }
946   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
947   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
948 
949 #if defined(USE_PETSC_BOPT_g)
950   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
951     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",
952              ((double)baij->ht_total_ct)/baij->ht_insert_ct);
953     baij->ht_total_ct  = 0;
954     baij->ht_insert_ct = 0;
955   }
956 #endif
957   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
958     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr);
959     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
960     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
961   }
962 
963   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
964   PetscFunctionReturn(0);
965 }
966 
967 #undef __FUNC__
968 #define __FUNC__ "MatView_MPIBAIJ_Binary"
969 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
970 {
971   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
972   int          ierr;
973 
974   PetscFunctionBegin;
975   if (baij->size == 1) {
976     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
977   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNC__
982 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
983 static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
984 {
985   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
986   int          ierr, format,rank,bs = baij->bs;
987   FILE         *fd;
988   ViewerType   vtype;
989 
990   PetscFunctionBegin;
991   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
992   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
993     ierr = ViewerGetFormat(viewer,&format);
994     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
995       MatInfo info;
996       MPI_Comm_rank(mat->comm,&rank);
997       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
998       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
999       PetscSequentialPhaseBegin(mat->comm,1);
1000       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1001               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
1002               baij->bs,(int)info.memory);
1003       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
1004       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
1005       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
1006       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
1007       fflush(fd);
1008       PetscSequentialPhaseEnd(mat->comm,1);
1009       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
1010       PetscFunctionReturn(0);
1011     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
1012       PetscPrintf(mat->comm,"  block size is %d\n",bs);
1013       PetscFunctionReturn(0);
1014     }
1015   }
1016 
1017   if (vtype == DRAW_VIEWER) {
1018     Draw       draw;
1019     PetscTruth isnull;
1020     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
1021     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1022   }
1023 
1024   if (vtype == ASCII_FILE_VIEWER) {
1025     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1026     PetscSequentialPhaseBegin(mat->comm,1);
1027     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
1028            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
1029             baij->cstart*bs,baij->cend*bs);
1030     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
1031     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
1032     fflush(fd);
1033     PetscSequentialPhaseEnd(mat->comm,1);
1034   } else {
1035     int size = baij->size;
1036     rank = baij->rank;
1037     if (size == 1) {
1038       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
1039     } else {
1040       /* assemble the entire matrix onto first processor. */
1041       Mat         A;
1042       Mat_SeqBAIJ *Aloc;
1043       int         M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals;
1044       int         mbs=baij->mbs;
1045       Scalar      *a;
1046 
1047       if (!rank) {
1048         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1049       } else {
1050         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1051       }
1052       PLogObjectParent(mat,A);
1053 
1054       /* copy over the A part */
1055       Aloc = (Mat_SeqBAIJ*) baij->A->data;
1056       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1057       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
1058 
1059       for ( i=0; i<mbs; i++ ) {
1060         rvals[0] = bs*(baij->rstart + i);
1061         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1062         for ( j=ai[i]; j<ai[i+1]; j++ ) {
1063           col = (baij->cstart+aj[j])*bs;
1064           for (k=0; k<bs; k++ ) {
1065             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1066             col++; a += bs;
1067           }
1068         }
1069       }
1070       /* copy over the B part */
1071       Aloc = (Mat_SeqBAIJ*) baij->B->data;
1072       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1073       for ( i=0; i<mbs; i++ ) {
1074         rvals[0] = bs*(baij->rstart + i);
1075         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1076         for ( j=ai[i]; j<ai[i+1]; j++ ) {
1077           col = baij->garray[aj[j]]*bs;
1078           for (k=0; k<bs; k++ ) {
1079             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1080             col++; a += bs;
1081           }
1082         }
1083       }
1084       PetscFree(rvals);
1085       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1086       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1087       /*
1088          Everyone has to call to draw the matrix since the graphics waits are
1089          synchronized across all processors that share the Draw object
1090       */
1091       if (!rank || vtype == DRAW_VIEWER) {
1092         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
1093       }
1094       ierr = MatDestroy(A); CHKERRQ(ierr);
1095     }
1096   }
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 
1101 
1102 #undef __FUNC__
1103 #define __FUNC__ "MatView_MPIBAIJ"
1104 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1105 {
1106   int         ierr;
1107   ViewerType  vtype;
1108 
1109   PetscFunctionBegin;
1110   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
1111   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
1112       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
1113     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
1114   } else if (vtype == BINARY_FILE_VIEWER) {
1115     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1116   } else {
1117     SETERRQ(1,1,"Viewer type not supported by PETSc object");
1118   }
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNC__
1123 #define __FUNC__ "MatDestroy_MPIBAIJ"
1124 int MatDestroy_MPIBAIJ(Mat mat)
1125 {
1126   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1127   int         ierr;
1128 
1129   PetscFunctionBegin;
1130 #if defined(USE_PETSC_LOG)
1131   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N);
1132 #endif
1133 
1134   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
1135   PetscFree(baij->rowners);
1136   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
1137   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1138   if (baij->colmap) PetscFree(baij->colmap);
1139   if (baij->garray) PetscFree(baij->garray);
1140   if (baij->lvec)   VecDestroy(baij->lvec);
1141   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
1142   if (baij->rowvalues) PetscFree(baij->rowvalues);
1143   if (baij->barray) PetscFree(baij->barray);
1144   if (baij->hd) PetscFree(baij->hd);
1145   PetscFree(baij);
1146   PLogObjectDestroy(mat);
1147   PetscHeaderDestroy(mat);
1148   PetscFunctionReturn(0);
1149 }
1150 
1151 #undef __FUNC__
1152 #define __FUNC__ "MatMult_MPIBAIJ"
1153 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1154 {
1155   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1156   int         ierr, nt;
1157 
1158   PetscFunctionBegin;
1159   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1160   if (nt != a->n) {
1161     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1162   }
1163   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1164   if (nt != a->m) {
1165     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1166   }
1167   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1168   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
1169   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1170   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
1171   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 #undef __FUNC__
1176 #define __FUNC__ "MatMultAdd_MPIBAIJ"
1177 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1178 {
1179   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1180   int        ierr;
1181 
1182   PetscFunctionBegin;
1183   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1184   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1185   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1186   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 #undef __FUNC__
1191 #define __FUNC__ "MatMultTrans_MPIBAIJ"
1192 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1193 {
1194   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1195   int         ierr;
1196 
1197   PetscFunctionBegin;
1198   /* do nondiagonal part */
1199   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1200   /* send it on its way */
1201   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1202   /* do local part */
1203   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
1204   /* receive remote parts: note this assumes the values are not actually */
1205   /* inserted in yy until the next line, which is true for my implementation*/
1206   /* but is not perhaps always true. */
1207   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 #undef __FUNC__
1212 #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1213 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1214 {
1215   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1216   int         ierr;
1217 
1218   PetscFunctionBegin;
1219   /* do nondiagonal part */
1220   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1221   /* send it on its way */
1222   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1223   /* do local part */
1224   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1225   /* receive remote parts: note this assumes the values are not actually */
1226   /* inserted in yy until the next line, which is true for my implementation*/
1227   /* but is not perhaps always true. */
1228   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 /*
1233   This only works correctly for square matrices where the subblock A->A is the
1234    diagonal block
1235 */
1236 #undef __FUNC__
1237 #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1238 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1239 {
1240   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1241   int         ierr;
1242 
1243   PetscFunctionBegin;
1244   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
1245   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNC__
1250 #define __FUNC__ "MatScale_MPIBAIJ"
1251 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1252 {
1253   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1254   int         ierr;
1255 
1256   PetscFunctionBegin;
1257   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
1258   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNC__
1263 #define __FUNC__ "MatGetSize_MPIBAIJ"
1264 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
1265 {
1266   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1267 
1268   PetscFunctionBegin;
1269   if (m) *m = mat->M;
1270   if (n) *n = mat->N;
1271   PetscFunctionReturn(0);
1272 }
1273 
1274 #undef __FUNC__
1275 #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1276 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
1277 {
1278   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1279 
1280   PetscFunctionBegin;
1281   *m = mat->m; *n = mat->n;
1282   PetscFunctionReturn(0);
1283 }
1284 
1285 #undef __FUNC__
1286 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1287 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1288 {
1289   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1290 
1291   PetscFunctionBegin;
1292   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
1293   PetscFunctionReturn(0);
1294 }
1295 
1296 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1297 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1298 
1299 #undef __FUNC__
1300 #define __FUNC__ "MatGetRow_MPIBAIJ"
1301 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1302 {
1303   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1304   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1305   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1306   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1307   int        *cmap, *idx_p,cstart = mat->cstart;
1308 
1309   PetscFunctionBegin;
1310   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1311   mat->getrowactive = PETSC_TRUE;
1312 
1313   if (!mat->rowvalues && (idx || v)) {
1314     /*
1315         allocate enough space to hold information from the longest row.
1316     */
1317     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1318     int     max = 1,mbs = mat->mbs,tmp;
1319     for ( i=0; i<mbs; i++ ) {
1320       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1321       if (max < tmp) { max = tmp; }
1322     }
1323     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
1324     CHKPTRQ(mat->rowvalues);
1325     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1326   }
1327 
1328   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1329   lrow = row - brstart;
1330 
1331   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1332   if (!v)   {pvA = 0; pvB = 0;}
1333   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1334   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1335   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1336   nztot = nzA + nzB;
1337 
1338   cmap  = mat->garray;
1339   if (v  || idx) {
1340     if (nztot) {
1341       /* Sort by increasing column numbers, assuming A and B already sorted */
1342       int imark = -1;
1343       if (v) {
1344         *v = v_p = mat->rowvalues;
1345         for ( i=0; i<nzB; i++ ) {
1346           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1347           else break;
1348         }
1349         imark = i;
1350         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1351         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1352       }
1353       if (idx) {
1354         *idx = idx_p = mat->rowindices;
1355         if (imark > -1) {
1356           for ( i=0; i<imark; i++ ) {
1357             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1358           }
1359         } else {
1360           for ( i=0; i<nzB; i++ ) {
1361             if (cmap[cworkB[i]/bs] < cstart)
1362               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1363             else break;
1364           }
1365           imark = i;
1366         }
1367         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1368         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1369       }
1370     } else {
1371       if (idx) *idx = 0;
1372       if (v)   *v   = 0;
1373     }
1374   }
1375   *nz = nztot;
1376   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1377   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 #undef __FUNC__
1382 #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1383 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1384 {
1385   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1386 
1387   PetscFunctionBegin;
1388   if (baij->getrowactive == PETSC_FALSE) {
1389     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1390   }
1391   baij->getrowactive = PETSC_FALSE;
1392   PetscFunctionReturn(0);
1393 }
1394 
1395 #undef __FUNC__
1396 #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1397 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1398 {
1399   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1400 
1401   PetscFunctionBegin;
1402   *bs = baij->bs;
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 #undef __FUNC__
1407 #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1408 int MatZeroEntries_MPIBAIJ(Mat A)
1409 {
1410   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
1411   int         ierr;
1412 
1413   PetscFunctionBegin;
1414   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
1415   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
1416   PetscFunctionReturn(0);
1417 }
1418 
1419 #undef __FUNC__
1420 #define __FUNC__ "MatGetInfo_MPIBAIJ"
1421 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1422 {
1423   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
1424   Mat         A = a->A, B = a->B;
1425   int         ierr;
1426   double      isend[5], irecv[5];
1427 
1428   PetscFunctionBegin;
1429   info->block_size     = (double)a->bs;
1430   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1431   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
1432   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1433   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
1434   if (flag == MAT_LOCAL) {
1435     info->nz_used      = isend[0];
1436     info->nz_allocated = isend[1];
1437     info->nz_unneeded  = isend[2];
1438     info->memory       = isend[3];
1439     info->mallocs      = isend[4];
1440   } else if (flag == MAT_GLOBAL_MAX) {
1441     ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);CHKERRQ(ierr);
1442     info->nz_used      = irecv[0];
1443     info->nz_allocated = irecv[1];
1444     info->nz_unneeded  = irecv[2];
1445     info->memory       = irecv[3];
1446     info->mallocs      = irecv[4];
1447   } else if (flag == MAT_GLOBAL_SUM) {
1448     ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);CHKERRQ(ierr);
1449     info->nz_used      = irecv[0];
1450     info->nz_allocated = irecv[1];
1451     info->nz_unneeded  = irecv[2];
1452     info->memory       = irecv[3];
1453     info->mallocs      = irecv[4];
1454   }
1455   info->rows_global       = (double)a->M;
1456   info->columns_global    = (double)a->N;
1457   info->rows_local        = (double)a->m;
1458   info->columns_local     = (double)a->N;
1459   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1460   info->fill_ratio_needed = 0;
1461   info->factor_mallocs    = 0;
1462   PetscFunctionReturn(0);
1463 }
1464 
1465 #undef __FUNC__
1466 #define __FUNC__ "MatSetOption_MPIBAIJ"
1467 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1468 {
1469   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1470 
1471   PetscFunctionBegin;
1472   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1473       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1474       op == MAT_COLUMNS_UNSORTED ||
1475       op == MAT_COLUMNS_SORTED ||
1476       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1477       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1478         MatSetOption(a->A,op);
1479         MatSetOption(a->B,op);
1480   } else if (op == MAT_ROW_ORIENTED) {
1481         a->roworiented = 1;
1482         MatSetOption(a->A,op);
1483         MatSetOption(a->B,op);
1484   } else if (op == MAT_ROWS_SORTED ||
1485              op == MAT_ROWS_UNSORTED ||
1486              op == MAT_SYMMETRIC ||
1487              op == MAT_STRUCTURALLY_SYMMETRIC ||
1488              op == MAT_YES_NEW_DIAGONALS ||
1489              op == MAT_USE_HASH_TABLE)
1490     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1491   else if (op == MAT_COLUMN_ORIENTED) {
1492     a->roworiented = 0;
1493     MatSetOption(a->A,op);
1494     MatSetOption(a->B,op);
1495   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1496     a->donotstash = 1;
1497   } else if (op == MAT_NO_NEW_DIAGONALS) {
1498     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1499   } else if (op == MAT_USE_HASH_TABLE) {
1500     a->ht_flag = 1;
1501   } else {
1502     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1503   }
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 #undef __FUNC__
1508 #define __FUNC__ "MatTranspose_MPIBAIJ("
1509 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1510 {
1511   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
1512   Mat_SeqBAIJ *Aloc;
1513   Mat        B;
1514   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1515   int        bs=baij->bs,mbs=baij->mbs;
1516   Scalar     *a;
1517 
1518   PetscFunctionBegin;
1519   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1520   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1521   CHKERRQ(ierr);
1522 
1523   /* copy over the A part */
1524   Aloc = (Mat_SeqBAIJ*) baij->A->data;
1525   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1526   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
1527 
1528   for ( i=0; i<mbs; i++ ) {
1529     rvals[0] = bs*(baij->rstart + i);
1530     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1531     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1532       col = (baij->cstart+aj[j])*bs;
1533       for (k=0; k<bs; k++ ) {
1534         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1535         col++; a += bs;
1536       }
1537     }
1538   }
1539   /* copy over the B part */
1540   Aloc = (Mat_SeqBAIJ*) baij->B->data;
1541   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1542   for ( i=0; i<mbs; i++ ) {
1543     rvals[0] = bs*(baij->rstart + i);
1544     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1545     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1546       col = baij->garray[aj[j]]*bs;
1547       for (k=0; k<bs; k++ ) {
1548         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1549         col++; a += bs;
1550       }
1551     }
1552   }
1553   PetscFree(rvals);
1554   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1555   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1556 
1557   if (matout != PETSC_NULL) {
1558     *matout = B;
1559   } else {
1560     PetscOps       *Abops;
1561     struct _MatOps *Aops;
1562 
1563     /* This isn't really an in-place transpose .... but free data structures from baij */
1564     PetscFree(baij->rowners);
1565     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
1566     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1567     if (baij->colmap) PetscFree(baij->colmap);
1568     if (baij->garray) PetscFree(baij->garray);
1569     if (baij->lvec) VecDestroy(baij->lvec);
1570     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
1571     PetscFree(baij);
1572 
1573     /*
1574        This is horrible, horrible code. We need to keep the
1575       A pointers for the bops and ops but copy everything
1576       else from C.
1577     */
1578     Abops = A->bops;
1579     Aops  = A->ops;
1580     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1581     A->bops = Abops;
1582     A->ops  = Aops;
1583 
1584     PetscHeaderDestroy(B);
1585   }
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 #undef __FUNC__
1590 #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
1591 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
1592 {
1593   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
1594   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
1595   int ierr,s1,s2,s3;
1596 
1597   PetscFunctionBegin;
1598   if (ll)  {
1599     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
1600     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1601     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes");
1602     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
1603     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
1604   }
1605   if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector");
1606   PetscFunctionReturn(0);
1607 }
1608 
1609 #undef __FUNC__
1610 #define __FUNC__ "MatZeroRows_MPIBAIJ"
1611 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1612 {
1613   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
1614   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1615   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1616   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1617   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1618   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1619   MPI_Comm       comm = A->comm;
1620   MPI_Request    *send_waits,*recv_waits;
1621   MPI_Status     recv_status,*send_status;
1622   IS             istmp;
1623   PetscTruth     localdiag;
1624 
1625   PetscFunctionBegin;
1626   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
1627   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
1628 
1629   /*  first count number of contributors to each processor */
1630   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
1631   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1632   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
1633   for ( i=0; i<N; i++ ) {
1634     idx = rows[i];
1635     found = 0;
1636     for ( j=0; j<size; j++ ) {
1637       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1638         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1639       }
1640     }
1641     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1642   }
1643   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1644 
1645   /* inform other processors of number of messages and max length*/
1646   work   = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
1647   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1648   nrecvs = work[rank];
1649   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
1650   nmax   = work[rank];
1651   PetscFree(work);
1652 
1653   /* post receives:   */
1654   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues);
1655   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1656   for ( i=0; i<nrecvs; i++ ) {
1657     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1658   }
1659 
1660   /* do sends:
1661      1) starts[i] gives the starting index in svalues for stuff going to
1662      the ith processor
1663   */
1664   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
1665   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1666   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
1667   starts[0] = 0;
1668   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1669   for ( i=0; i<N; i++ ) {
1670     svalues[starts[owner[i]]++] = rows[i];
1671   }
1672   ISRestoreIndices(is,&rows);
1673 
1674   starts[0] = 0;
1675   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1676   count = 0;
1677   for ( i=0; i<size; i++ ) {
1678     if (procs[i]) {
1679       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1680     }
1681   }
1682   PetscFree(starts);
1683 
1684   base = owners[rank]*bs;
1685 
1686   /*  wait on receives */
1687   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
1688   source = lens + nrecvs;
1689   count  = nrecvs; slen = 0;
1690   while (count) {
1691     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1692     /* unpack receives into our local space */
1693     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1694     source[imdex]  = recv_status.MPI_SOURCE;
1695     lens[imdex]  = n;
1696     slen += n;
1697     count--;
1698   }
1699   PetscFree(recv_waits);
1700 
1701   /* move the data into the send scatter */
1702   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
1703   count = 0;
1704   for ( i=0; i<nrecvs; i++ ) {
1705     values = rvalues + i*nmax;
1706     for ( j=0; j<lens[i]; j++ ) {
1707       lrows[count++] = values[j] - base;
1708     }
1709   }
1710   PetscFree(rvalues); PetscFree(lens);
1711   PetscFree(owner); PetscFree(nprocs);
1712 
1713   /* actually zap the local rows */
1714   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1715   PLogObjectParent(A,istmp);
1716 
1717   /*
1718         Zero the required rows. If the "diagonal block" of the matrix
1719      is square and the user wishes to set the diagonal we use seperate
1720      code so that MatSetValues() is not called for each diagonal allocating
1721      new memory, thus calling lots of mallocs and slowing things down.
1722 
1723        Contributed by: Mathew Knepley
1724   */
1725   localdiag = PETSC_FALSE;
1726   if (diag && (l->A->M == l->A->N)) {
1727     localdiag = PETSC_TRUE;
1728     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
1729   } else {
1730     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
1731   }
1732   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
1733   ierr = ISDestroy(istmp); CHKERRQ(ierr);
1734 
1735   if (diag && (localdiag == PETSC_FALSE)) {
1736     for ( i = 0; i < slen; i++ ) {
1737       row = lrows[i] + rstart_bs;
1738       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr);
1739     }
1740     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1741     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1742   }
1743   PetscFree(lrows);
1744 
1745   /* wait on sends */
1746   if (nsends) {
1747     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1748     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1749     PetscFree(send_status);
1750   }
1751   PetscFree(send_waits); PetscFree(svalues);
1752 
1753   PetscFunctionReturn(0);
1754 }
1755 
1756 extern int MatPrintHelp_SeqBAIJ(Mat);
1757 #undef __FUNC__
1758 #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1759 int MatPrintHelp_MPIBAIJ(Mat A)
1760 {
1761   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1762   MPI_Comm    comm = A->comm;
1763   static int  called = 0;
1764   int         ierr;
1765 
1766   PetscFunctionBegin;
1767   if (!a->rank) {
1768     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1769   }
1770   if (called) {PetscFunctionReturn(0);} else called = 1;
1771   (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");
1772   (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 #undef __FUNC__
1777 #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1778 int MatSetUnfactored_MPIBAIJ(Mat A)
1779 {
1780   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1781   int         ierr;
1782 
1783   PetscFunctionBegin;
1784   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1785   PetscFunctionReturn(0);
1786 }
1787 
1788 static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
1789 
1790 /* -------------------------------------------------------------------*/
1791 static struct _MatOps MatOps = {
1792   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
1793   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
1794   0,0,0,0,
1795   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
1796   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
1797   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
1798   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0,
1799   0,0,0,MatGetSize_MPIBAIJ,
1800   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
1801   0,0,MatConvertSameType_MPIBAIJ,0,0,
1802   0,0,0,MatGetSubMatrices_MPIBAIJ,
1803   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
1804   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ,
1805   0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ};
1806 
1807 
1808 #undef __FUNC__
1809 #define __FUNC__ "MatCreateMPIBAIJ"
1810 /*@C
1811    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1812    (block compressed row).  For good matrix assembly performance
1813    the user should preallocate the matrix storage by setting the parameters
1814    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1815    performance can be increased by more than a factor of 50.
1816 
1817    Collective on MPI_Comm
1818 
1819    Input Parameters:
1820 +  comm - MPI communicator
1821 .  bs   - size of blockk
1822 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1823            This value should be the same as the local size used in creating the
1824            y vector for the matrix-vector product y = Ax.
1825 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1826            This value should be the same as the local size used in creating the
1827            x vector for the matrix-vector product y = Ax.
1828 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1829 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1830 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1831            submatrix  (same for all local rows)
1832 .  d_nzz - array containing the number of block nonzeros in the various block rows
1833            of the in diagonal portion of the local (possibly different for each block
1834            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1835 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1836            submatrix (same for all local rows).
1837 -  o_nzz - array containing the number of nonzeros in the various block rows of the
1838            off-diagonal portion of the local submatrix (possibly different for
1839            each block row) or PETSC_NULL.
1840 
1841    Output Parameter:
1842 .  A - the matrix
1843 
1844    Options Database Keys:
1845 .   -mat_no_unroll - uses code that does not unroll the loops in the
1846                      block calculations (much slower)
1847 .   -mat_block_size - size of the blocks to use
1848 
1849    Notes:
1850    The user MUST specify either the local or global matrix dimensions
1851    (possibly both).
1852 
1853    Storage Information:
1854    For a square global matrix we define each processor's diagonal portion
1855    to be its local rows and the corresponding columns (a square submatrix);
1856    each processor's off-diagonal portion encompasses the remainder of the
1857    local matrix (a rectangular submatrix).
1858 
1859    The user can specify preallocated storage for the diagonal part of
1860    the local submatrix with either d_nz or d_nnz (not both).  Set
1861    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1862    memory allocation.  Likewise, specify preallocated storage for the
1863    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1864 
1865    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1866    the figure below we depict these three local rows and all columns (0-11).
1867 
1868 .vb
1869            0 1 2 3 4 5 6 7 8 9 10 11
1870           -------------------
1871    row 3  |  o o o d d d o o o o o o
1872    row 4  |  o o o d d d o o o o o o
1873    row 5  |  o o o d d d o o o o o o
1874           -------------------
1875 .ve
1876 
1877    Thus, any entries in the d locations are stored in the d (diagonal)
1878    submatrix, and any entries in the o locations are stored in the
1879    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1880    stored simply in the MATSEQBAIJ format for compressed row storage.
1881 
1882    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1883    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1884    In general, for PDE problems in which most nonzeros are near the diagonal,
1885    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1886    or you will get TERRIBLE performance; see the users' manual chapter on
1887    matrices.
1888 
1889 .keywords: matrix, block, aij, compressed row, sparse, parallel
1890 
1891 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ()
1892 @*/
1893 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
1894                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1895 {
1896   Mat          B;
1897   Mat_MPIBAIJ  *b;
1898   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg;
1899 
1900   PetscFunctionBegin;
1901   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
1902 
1903   MPI_Comm_size(comm,&size);
1904   if (size == 1) {
1905     if (M == PETSC_DECIDE) M = m;
1906     if (N == PETSC_DECIDE) N = n;
1907     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1908     PetscFunctionReturn(0);
1909   }
1910 
1911   *A = 0;
1912   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView);
1913   PLogObjectCreate(B);
1914   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
1915   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
1916   PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps));
1917 
1918   B->ops->destroy    = MatDestroy_MPIBAIJ;
1919   B->ops->view       = MatView_MPIBAIJ;
1920   B->mapping    = 0;
1921   B->factor     = 0;
1922   B->assembled  = PETSC_FALSE;
1923 
1924   B->insertmode = NOT_SET_VALUES;
1925   MPI_Comm_rank(comm,&b->rank);
1926   MPI_Comm_size(comm,&b->size);
1927 
1928   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1929     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1930   }
1931   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
1932     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
1933   }
1934   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
1935     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
1936   }
1937   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1938   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
1939 
1940   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1941     work[0] = m; work[1] = n;
1942     mbs = m/bs; nbs = n/bs;
1943     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
1944     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
1945     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
1946   }
1947   if (m == PETSC_DECIDE) {
1948     Mbs = M/bs;
1949     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
1950     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
1951     m   = mbs*bs;
1952   }
1953   if (n == PETSC_DECIDE) {
1954     Nbs = N/bs;
1955     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
1956     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
1957     n   = nbs*bs;
1958   }
1959   if (mbs*bs != m || nbs*bs != n) {
1960     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
1961   }
1962 
1963   b->m = m; B->m = m;
1964   b->n = n; B->n = n;
1965   b->N = N; B->N = N;
1966   b->M = M; B->M = M;
1967   b->bs  = bs;
1968   b->bs2 = bs*bs;
1969   b->mbs = mbs;
1970   b->nbs = nbs;
1971   b->Mbs = Mbs;
1972   b->Nbs = Nbs;
1973 
1974   /* build local table of row and column ownerships */
1975   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1976   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
1977   b->cowners = b->rowners + b->size + 2;
1978   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1979   b->rowners[0] = 0;
1980   for ( i=2; i<=b->size; i++ ) {
1981     b->rowners[i] += b->rowners[i-1];
1982   }
1983   b->rstart    = b->rowners[b->rank];
1984   b->rend      = b->rowners[b->rank+1];
1985   b->rstart_bs = b->rstart * bs;
1986   b->rend_bs   = b->rend * bs;
1987 
1988   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1989   b->cowners[0] = 0;
1990   for ( i=2; i<=b->size; i++ ) {
1991     b->cowners[i] += b->cowners[i-1];
1992   }
1993   b->cstart    = b->cowners[b->rank];
1994   b->cend      = b->cowners[b->rank+1];
1995   b->cstart_bs = b->cstart * bs;
1996   b->cend_bs   = b->cend * bs;
1997 
1998 
1999   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2000   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
2001   PLogObjectParent(B,b->A);
2002   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2003   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
2004   PLogObjectParent(B,b->B);
2005 
2006   /* build cache for off array entries formed */
2007   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
2008   b->donotstash  = 0;
2009   b->colmap      = 0;
2010   b->garray      = 0;
2011   b->roworiented = 1;
2012 
2013   /* stuff used in block assembly */
2014   b->barray       = 0;
2015 
2016   /* stuff used for matrix vector multiply */
2017   b->lvec         = 0;
2018   b->Mvctx        = 0;
2019 
2020   /* stuff for MatGetRow() */
2021   b->rowindices   = 0;
2022   b->rowvalues    = 0;
2023   b->getrowactive = PETSC_FALSE;
2024 
2025   /* hash table stuff */
2026   b->ht           = 0;
2027   b->hd           = 0;
2028   b->ht_size      = 0;
2029   b->ht_flag      = 0;
2030   b->ht_fact      = 0;
2031   b->ht_total_ct  = 0;
2032   b->ht_insert_ct = 0;
2033 
2034   *A = B;
2035   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr);
2036   if (flg) {
2037     double fact = 1.39;
2038     ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr);
2039     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr);
2040     if (fact <= 1.0) fact = 1.39;
2041     ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr);
2042     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2043   }
2044   PetscFunctionReturn(0);
2045 }
2046 
2047 #undef __FUNC__
2048 #define __FUNC__ "MatConvertSameType_MPIBAIJ"
2049 static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
2050 {
2051   Mat         mat;
2052   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
2053   int         ierr, len=0, flg;
2054 
2055   PetscFunctionBegin;
2056   *newmat       = 0;
2057   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView);
2058   PLogObjectCreate(mat);
2059   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
2060   PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps));
2061   mat->ops->destroy    = MatDestroy_MPIBAIJ;
2062   mat->ops->view       = MatView_MPIBAIJ;
2063   mat->factor     = matin->factor;
2064   mat->assembled  = PETSC_TRUE;
2065 
2066   a->m = mat->m   = oldmat->m;
2067   a->n = mat->n   = oldmat->n;
2068   a->M = mat->M   = oldmat->M;
2069   a->N = mat->N   = oldmat->N;
2070 
2071   a->bs  = oldmat->bs;
2072   a->bs2 = oldmat->bs2;
2073   a->mbs = oldmat->mbs;
2074   a->nbs = oldmat->nbs;
2075   a->Mbs = oldmat->Mbs;
2076   a->Nbs = oldmat->Nbs;
2077 
2078   a->rstart       = oldmat->rstart;
2079   a->rend         = oldmat->rend;
2080   a->cstart       = oldmat->cstart;
2081   a->cend         = oldmat->cend;
2082   a->size         = oldmat->size;
2083   a->rank         = oldmat->rank;
2084   mat->insertmode = NOT_SET_VALUES;
2085   a->rowvalues    = 0;
2086   a->getrowactive = PETSC_FALSE;
2087   a->barray       = 0;
2088 
2089   /* hash table stuff */
2090   a->ht           = 0;
2091   a->hd           = 0;
2092   a->ht_size      = 0;
2093   a->ht_flag      = oldmat->ht_flag;
2094   a->ht_fact      = oldmat->ht_fact;
2095   a->ht_total_ct  = 0;
2096   a->ht_insert_ct = 0;
2097 
2098 
2099   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
2100   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2101   a->cowners = a->rowners + a->size + 2;
2102   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
2103   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
2104   if (oldmat->colmap) {
2105     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2106     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2107     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
2108   } else a->colmap = 0;
2109   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
2110     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
2111     PLogObjectMemory(mat,len*sizeof(int));
2112     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
2113   } else a->garray = 0;
2114 
2115   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
2116   PLogObjectParent(mat,a->lvec);
2117   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
2118   PLogObjectParent(mat,a->Mvctx);
2119   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
2120   PLogObjectParent(mat,a->A);
2121   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
2122   PLogObjectParent(mat,a->B);
2123   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
2124   if (flg) {
2125     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
2126   }
2127   *newmat = mat;
2128   PetscFunctionReturn(0);
2129 }
2130 
2131 #include "sys.h"
2132 
2133 #undef __FUNC__
2134 #define __FUNC__ "MatLoad_MPIBAIJ"
2135 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2136 {
2137   Mat          A;
2138   int          i, nz, ierr, j,rstart, rend, fd;
2139   Scalar       *vals,*buf;
2140   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2141   MPI_Status   status;
2142   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2143   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
2144   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2145   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2146   int          dcount,kmax,k,nzcount,tmp;
2147 
2148   PetscFunctionBegin;
2149   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2150 
2151   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
2152   if (!rank) {
2153     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2154     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
2155     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2156     if (header[3] < 0) {
2157       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2158     }
2159   }
2160 
2161   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2162   M = header[1]; N = header[2];
2163 
2164   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2165 
2166   /*
2167      This code adds extra rows to make sure the number of rows is
2168      divisible by the blocksize
2169   */
2170   Mbs        = M/bs;
2171   extra_rows = bs - M + bs*(Mbs);
2172   if (extra_rows == bs) extra_rows = 0;
2173   else                  Mbs++;
2174   if (extra_rows &&!rank) {
2175     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2176   }
2177 
2178   /* determine ownership of all rows */
2179   mbs = Mbs/size + ((Mbs % size) > rank);
2180   m   = mbs * bs;
2181   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
2182   browners = rowners + size + 1;
2183   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2184   rowners[0] = 0;
2185   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2186   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
2187   rstart = rowners[rank];
2188   rend   = rowners[rank+1];
2189 
2190   /* distribute row lengths to all processors */
2191   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
2192   if (!rank) {
2193     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
2194     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
2195     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2196     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
2197     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2198     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2199     PetscFree(sndcounts);
2200   } else {
2201     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
2202   }
2203 
2204   if (!rank) {
2205     /* calculate the number of nonzeros on each processor */
2206     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
2207     PetscMemzero(procsnz,size*sizeof(int));
2208     for ( i=0; i<size; i++ ) {
2209       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
2210         procsnz[i] += rowlengths[j];
2211       }
2212     }
2213     PetscFree(rowlengths);
2214 
2215     /* determine max buffer needed and allocate it */
2216     maxnz = 0;
2217     for ( i=0; i<size; i++ ) {
2218       maxnz = PetscMax(maxnz,procsnz[i]);
2219     }
2220     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
2221 
2222     /* read in my part of the matrix column indices  */
2223     nz = procsnz[0];
2224     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
2225     mycols = ibuf;
2226     if (size == 1)  nz -= extra_rows;
2227     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
2228     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2229 
2230     /* read in every ones (except the last) and ship off */
2231     for ( i=1; i<size-1; i++ ) {
2232       nz   = procsnz[i];
2233       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2234       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2235     }
2236     /* read in the stuff for the last proc */
2237     if ( size != 1 ) {
2238       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2239       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2240       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2241       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2242     }
2243     PetscFree(cols);
2244   } else {
2245     /* determine buffer space needed for message */
2246     nz = 0;
2247     for ( i=0; i<m; i++ ) {
2248       nz += locrowlens[i];
2249     }
2250     ibuf   = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
2251     mycols = ibuf;
2252     /* receive message of column indices*/
2253     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2254     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2255     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2256   }
2257 
2258   /* loop over local rows, determining number of off diagonal entries */
2259   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
2260   odlens = dlens + (rend-rstart);
2261   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
2262   PetscMemzero(mask,3*Mbs*sizeof(int));
2263   masked1 = mask    + Mbs;
2264   masked2 = masked1 + Mbs;
2265   rowcount = 0; nzcount = 0;
2266   for ( i=0; i<mbs; i++ ) {
2267     dcount  = 0;
2268     odcount = 0;
2269     for ( j=0; j<bs; j++ ) {
2270       kmax = locrowlens[rowcount];
2271       for ( k=0; k<kmax; k++ ) {
2272         tmp = mycols[nzcount++]/bs;
2273         if (!mask[tmp]) {
2274           mask[tmp] = 1;
2275           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
2276           else masked1[dcount++] = tmp;
2277         }
2278       }
2279       rowcount++;
2280     }
2281 
2282     dlens[i]  = dcount;
2283     odlens[i] = odcount;
2284 
2285     /* zero out the mask elements we set */
2286     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
2287     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
2288   }
2289 
2290   /* create our matrix */
2291   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
2292          CHKERRQ(ierr);
2293   A = *newmat;
2294   MatSetOption(A,MAT_COLUMNS_SORTED);
2295 
2296   if (!rank) {
2297     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
2298     /* read in my part of the matrix numerical values  */
2299     nz = procsnz[0];
2300     vals = buf;
2301     mycols = ibuf;
2302     if (size == 1)  nz -= extra_rows;
2303     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2304     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2305 
2306     /* insert into matrix */
2307     jj      = rstart*bs;
2308     for ( i=0; i<m; i++ ) {
2309       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2310       mycols += locrowlens[i];
2311       vals   += locrowlens[i];
2312       jj++;
2313     }
2314     /* read in other processors (except the last one) and ship out */
2315     for ( i=1; i<size-1; i++ ) {
2316       nz   = procsnz[i];
2317       vals = buf;
2318       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2319       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2320     }
2321     /* the last proc */
2322     if ( size != 1 ){
2323       nz   = procsnz[i] - extra_rows;
2324       vals = buf;
2325       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2326       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2327       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2328     }
2329     PetscFree(procsnz);
2330   } else {
2331     /* receive numeric values */
2332     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
2333 
2334     /* receive message of values*/
2335     vals   = buf;
2336     mycols = ibuf;
2337     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2338     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2339     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2340 
2341     /* insert into matrix */
2342     jj      = rstart*bs;
2343     for ( i=0; i<m; i++ ) {
2344       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2345       mycols += locrowlens[i];
2346       vals   += locrowlens[i];
2347       jj++;
2348     }
2349   }
2350   PetscFree(locrowlens);
2351   PetscFree(buf);
2352   PetscFree(ibuf);
2353   PetscFree(rowners);
2354   PetscFree(dlens);
2355   PetscFree(mask);
2356   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2357   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2358   PetscFunctionReturn(0);
2359 }
2360 
2361 
2362 
2363 #undef __FUNC__
2364 #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2365 /*@
2366    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2367 
2368    Input Parameters:
2369 .  mat  - the matrix
2370 .  fact - factor
2371 
2372    Collective on Mat
2373 
2374   Notes:
2375    This can also be set by the command line option: -mat_use_hash_table fact
2376 
2377 .keywords: matrix, hashtable, factor, HT
2378 
2379 .seealso: MatSetOption()
2380 @*/
2381 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2382 {
2383   Mat_MPIBAIJ *baij;
2384 
2385   PetscFunctionBegin;
2386   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2387   if (mat->type != MATMPIBAIJ) {
2388       SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2389   }
2390   baij = (Mat_MPIBAIJ*) mat->data;
2391   baij->ht_fact = fact;
2392   PetscFunctionReturn(0);
2393 }
2394