xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 488ecbaffa467bb032d31d7eb20bc6d0ef6d986c)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpibaij.c,v 1.133 1998/07/14 03:08:02 bsmith Exp bsmith $";
3 #endif
4 
5 #include "pinclude/pviewer.h"         /*I "mat.h" I*/
6 #include "src/mat/impls/baij/mpi/mpibaij.h"
7 #include "src/vec/vecimpl.h"
8 
9 
10 extern int MatSetUpMultiply_MPIBAIJ(Mat);
11 extern int DisAssemble_MPIBAIJ(Mat);
12 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
13 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
14 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
15 
16 /*
17      Local utility routine that creates a mapping from the global column
18    number to the local number in the off-diagonal part of the local
19    storage of the matrix.  This is done in a non scable way since the
20    length of colmap equals the global matrix length.
21 */
22 #undef __FUNC__
23 #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
24 static int CreateColmap_MPIBAIJ_Private(Mat mat)
25 {
26   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
27   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
28   int         nbs = B->nbs,i,bs=B->bs;;
29 
30   PetscFunctionBegin;
31   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
32   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
33   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
34   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
35   PetscFunctionReturn(0);
36 }
37 
38 #define CHUNKSIZE  10
39 
40 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
41 { \
42  \
43     brow = row/bs;  \
44     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
45     rmax = aimax[brow]; nrow = ailen[brow]; \
46       bcol = col/bs; \
47       ridx = row % bs; cidx = col % bs; \
48       low = 0; high = nrow; \
49       while (high-low > 3) { \
50         t = (low+high)/2; \
51         if (rp[t] > bcol) high = t; \
52         else              low  = t; \
53       } \
54       for ( _i=low; _i<high; _i++ ) { \
55         if (rp[_i] > bcol) break; \
56         if (rp[_i] == bcol) { \
57           bap  = ap +  bs2*_i + bs*cidx + ridx; \
58           if (addv == ADD_VALUES) *bap += value;  \
59           else                    *bap  = value;  \
60           goto a_noinsert; \
61         } \
62       } \
63       if (a->nonew == 1) goto a_noinsert; \
64       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
65       if (nrow >= rmax) { \
66         /* there is no extra room in row, therefore enlarge */ \
67         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
68         Scalar *new_a; \
69  \
70         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
71  \
72         /* malloc new storage space */ \
73         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
74         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
75         new_j   = (int *) (new_a + bs2*new_nz); \
76         new_i   = new_j + new_nz; \
77  \
78         /* copy over old data into new slots */ \
79         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
80         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
81         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
82         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
83         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
84                                                            len*sizeof(int)); \
85         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
86         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
87         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
88                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
89         /* free up old matrix storage */ \
90         PetscFree(a->a);  \
91         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
92         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
93         a->singlemalloc = 1; \
94  \
95         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
96         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
97         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
98         a->maxnz += bs2*CHUNKSIZE; \
99         a->reallocs++; \
100         a->nz++; \
101       } \
102       N = nrow++ - 1;  \
103       /* shift up all the later entries in this row */ \
104       for ( ii=N; ii>=_i; ii-- ) { \
105         rp[ii+1] = rp[ii]; \
106         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
107       } \
108       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
109       rp[_i]                      = bcol;  \
110       ap[bs2*_i + bs*cidx + ridx] = value;  \
111       a_noinsert:; \
112     ailen[brow] = nrow; \
113 }
114 
115 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
116 { \
117  \
118     brow = row/bs;  \
119     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
120     rmax = bimax[brow]; nrow = bilen[brow]; \
121       bcol = col/bs; \
122       ridx = row % bs; cidx = col % bs; \
123       low = 0; high = nrow; \
124       while (high-low > 3) { \
125         t = (low+high)/2; \
126         if (rp[t] > bcol) high = t; \
127         else              low  = t; \
128       } \
129       for ( _i=low; _i<high; _i++ ) { \
130         if (rp[_i] > bcol) break; \
131         if (rp[_i] == bcol) { \
132           bap  = ap +  bs2*_i + bs*cidx + ridx; \
133           if (addv == ADD_VALUES) *bap += value;  \
134           else                    *bap  = value;  \
135           goto b_noinsert; \
136         } \
137       } \
138       if (b->nonew == 1) goto b_noinsert; \
139       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
140       if (nrow >= rmax) { \
141         /* there is no extra room in row, therefore enlarge */ \
142         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
143         Scalar *new_a; \
144  \
145         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
146  \
147         /* malloc new storage space */ \
148         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
149         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
150         new_j   = (int *) (new_a + bs2*new_nz); \
151         new_i   = new_j + new_nz; \
152  \
153         /* copy over old data into new slots */ \
154         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
155         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
156         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
157         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
158         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
159                                                            len*sizeof(int)); \
160         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
161         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
162         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
163                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
164         /* free up old matrix storage */ \
165         PetscFree(b->a);  \
166         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
167         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
168         b->singlemalloc = 1; \
169  \
170         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
171         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
172         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
173         b->maxnz += bs2*CHUNKSIZE; \
174         b->reallocs++; \
175         b->nz++; \
176       } \
177       N = nrow++ - 1;  \
178       /* shift up all the later entries in this row */ \
179       for ( ii=N; ii>=_i; ii-- ) { \
180         rp[ii+1] = rp[ii]; \
181         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
182       } \
183       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
184       rp[_i]                      = bcol;  \
185       ap[bs2*_i + bs*cidx + ridx] = value;  \
186       b_noinsert:; \
187     bilen[brow] = nrow; \
188 }
189 
190 #undef __FUNC__
191 #define __FUNC__ "MatSetValues_MPIBAIJ"
192 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
193 {
194   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
195   Scalar      value;
196   int         ierr,i,j,row,col;
197   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
198   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
199   int         cend_orig=baij->cend_bs,bs=baij->bs;
200 
201   /* Some Variables required in the macro */
202   Mat         A = baij->A;
203   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
204   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
205   Scalar      *aa=a->a;
206 
207   Mat         B = baij->B;
208   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
209   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
210   Scalar      *ba=b->a;
211 
212   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
213   int         low,high,t,ridx,cidx,bs2=a->bs2;
214   Scalar      *ap,*bap;
215 
216   PetscFunctionBegin;
217   for ( i=0; i<m; i++ ) {
218 #if defined(USE_PETSC_BOPT_g)
219     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
220     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
221 #endif
222     if (im[i] >= rstart_orig && im[i] < rend_orig) {
223       row = im[i] - rstart_orig;
224       for ( j=0; j<n; j++ ) {
225         if (in[j] >= cstart_orig && in[j] < cend_orig){
226           col = in[j] - cstart_orig;
227           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
228           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
229           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
230         }
231 #if defined(USE_PETSC_BOPT_g)
232         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
233         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
234 #endif
235         else {
236           if (mat->was_assembled) {
237             if (!baij->colmap) {
238               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
239             }
240             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
241             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
242               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
243               col =  in[j];
244               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
245               B = baij->B;
246               b = (Mat_SeqBAIJ *) (B)->data;
247               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
248               ba=b->a;
249             }
250           } else col = in[j];
251           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
252           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
253           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
254         }
255       }
256     } else {
257       if (roworiented && !baij->donotstash) {
258         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
259       } else {
260         if (!baij->donotstash) {
261           row = im[i];
262 	  for ( j=0; j<n; j++ ) {
263 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
264           }
265         }
266       }
267     }
268   }
269   PetscFunctionReturn(0);
270 }
271 
272 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
273 #undef __FUNC__
274 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
275 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
276 {
277   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
278   Scalar      *value,*barray=baij->barray;
279   int         ierr,i,j,ii,jj,row,col,k,l;
280   int         roworiented = baij->roworiented,rstart=baij->rstart ;
281   int         rend=baij->rend,cstart=baij->cstart,stepval;
282   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
283 
284   if(!barray) {
285     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
286   }
287 
288   if (roworiented) {
289     stepval = (n-1)*bs;
290   } else {
291     stepval = (m-1)*bs;
292   }
293   for ( i=0; i<m; i++ ) {
294 #if defined(USE_PETSC_BOPT_g)
295     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
296     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
297 #endif
298     if (im[i] >= rstart && im[i] < rend) {
299       row = im[i] - rstart;
300       for ( j=0; j<n; j++ ) {
301         /* If NumCol = 1 then a copy is not required */
302         if ((roworiented) && (n == 1)) {
303           barray = v + i*bs2;
304         } else if((!roworiented) && (m == 1)) {
305           barray = v + j*bs2;
306         } else { /* Here a copy is required */
307           if (roworiented) {
308             value = v + i*(stepval+bs)*bs + j*bs;
309           } else {
310             value = v + j*(stepval+bs)*bs + i*bs;
311           }
312           for ( ii=0; ii<bs; ii++,value+=stepval ) {
313             for (jj=0; jj<bs; jj++ ) {
314               *barray++  = *value++;
315             }
316           }
317           barray -=bs2;
318         }
319 
320         if (in[j] >= cstart && in[j] < cend){
321           col  = in[j] - cstart;
322           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
323         }
324 #if defined(USE_PETSC_BOPT_g)
325         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
326         else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
327 #endif
328         else {
329           if (mat->was_assembled) {
330             if (!baij->colmap) {
331               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
332             }
333 
334 #if defined(USE_PETSC_BOPT_g)
335             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
336 #endif
337             col = (baij->colmap[in[j]] - 1)/bs;
338             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
339               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
340               col =  in[j];
341             }
342           }
343           else col = in[j];
344           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
345         }
346       }
347     } else {
348       if (!baij->donotstash) {
349         if (roworiented ) {
350           row   = im[i]*bs;
351           value = v + i*(stepval+bs)*bs;
352           for ( j=0; j<bs; j++,row++ ) {
353             for ( k=0; k<n; k++ ) {
354               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
355                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
356               }
357             }
358           }
359         } else {
360           for ( j=0; j<n; j++ ) {
361             value = v + j*(stepval+bs)*bs + i*bs;
362             col   = in[j]*bs;
363             for ( k=0; k<bs; k++,col++,value+=stepval) {
364               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
365                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
366               }
367             }
368           }
369         }
370       }
371     }
372   }
373   PetscFunctionReturn(0);
374 }
375 #include <math.h>
376 #define HASH_KEY 0.6180339887
377 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
379 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
380 #undef __FUNC__
381 #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
382 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
383 {
384   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
385   int         ierr,i,j,row,col;
386   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
387   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
388   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
389   double      tmp;
390   Scalar      ** HD = baij->hd,value;
391 #if defined(USE_PETSC_BOPT_g)
392   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
393 #endif
394 
395   PetscFunctionBegin;
396 
397   for ( i=0; i<m; i++ ) {
398 #if defined(USE_PETSC_BOPT_g)
399     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
400     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
401 #endif
402       row = im[i];
403     if (row >= rstart_orig && row < rend_orig) {
404       for ( j=0; j<n; j++ ) {
405         col = in[j];
406         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
407         /* Look up into the Hash Table */
408         key = (row/bs)*Nbs+(col/bs)+1;
409         h1  = HASH(size,key,tmp);
410 
411 
412         idx = h1;
413 #if defined(USE_PETSC_BOPT_g)
414         insert_ct++;
415         total_ct++;
416         if (HT[idx] != key) {
417           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
418           if (idx == size) {
419             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
420             if (idx == h1) {
421               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
422             }
423           }
424         }
425 #else
426         if (HT[idx] != key) {
427           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
428           if (idx == size) {
429             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
430             if (idx == h1) {
431               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
432             }
433           }
434         }
435 #endif
436         /* A HASH table entry is found, so insert the values at the correct address */
437         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
438         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
439       }
440     } else {
441       if (roworiented && !baij->donotstash) {
442         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
443       } else {
444         if (!baij->donotstash) {
445           row = im[i];
446 	  for ( j=0; j<n; j++ ) {
447 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
448           }
449         }
450       }
451     }
452   }
453 #if defined(USE_PETSC_BOPT_g)
454   baij->ht_total_ct = total_ct;
455   baij->ht_insert_ct = insert_ct;
456 #endif
457   PetscFunctionReturn(0);
458 }
459 
460 #undef __FUNC__
461 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
462 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
463 {
464   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
465   int         ierr,i,j,ii,jj,row,col,k,l;
466   int         roworiented = baij->roworiented,rstart=baij->rstart ;
467   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
468   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
469   double      tmp;
470   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
471 #if defined(USE_PETSC_BOPT_g)
472   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
473 #endif
474 
475   PetscFunctionBegin;
476 
477   if (roworiented) {
478     stepval = (n-1)*bs;
479   } else {
480     stepval = (m-1)*bs;
481   }
482   for ( i=0; i<m; i++ ) {
483 #if defined(USE_PETSC_BOPT_g)
484     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
485     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
486 #endif
487     row   = im[i];
488     v_t   = v + i*bs2;
489     if (row >= rstart && row < rend) {
490       for ( j=0; j<n; j++ ) {
491         col = in[j];
492 
493         /* Look up into the Hash Table */
494         key = row*Nbs+col+1;
495         h1  = HASH(size,key,tmp);
496 
497         idx = h1;
498 #if defined(USE_PETSC_BOPT_g)
499         total_ct++;
500         insert_ct++;
501        if (HT[idx] != key) {
502           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
503           if (idx == size) {
504             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
505             if (idx == h1) {
506               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
507             }
508           }
509         }
510 #else
511         if (HT[idx] != key) {
512           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
513           if (idx == size) {
514             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
515             if (idx == h1) {
516               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
517             }
518           }
519         }
520 #endif
521         baij_a = HD[idx];
522         if (roworiented) {
523           /*value = v + i*(stepval+bs)*bs + j*bs;*/
524           /* value = v + (i*(stepval+bs)+j)*bs; */
525           value = v_t;
526           v_t  += bs;
527           if (addv == ADD_VALUES) {
528             for ( ii=0; ii<bs; ii++,value+=stepval) {
529               for ( jj=ii; jj<bs2; jj+=bs ) {
530                 baij_a[jj]  += *value++;
531               }
532             }
533           } else {
534             for ( ii=0; ii<bs; ii++,value+=stepval) {
535               for ( jj=ii; jj<bs2; jj+=bs ) {
536                 baij_a[jj]  = *value++;
537               }
538             }
539           }
540         } else {
541           value = v + j*(stepval+bs)*bs + i*bs;
542           if (addv == ADD_VALUES) {
543             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
544               for ( jj=0; jj<bs; jj++ ) {
545                 baij_a[jj]  += *value++;
546               }
547             }
548           } else {
549             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
550               for ( jj=0; jj<bs; jj++ ) {
551                 baij_a[jj]  = *value++;
552               }
553             }
554           }
555         }
556       }
557     } else {
558       if (!baij->donotstash) {
559         if (roworiented ) {
560           row   = im[i]*bs;
561           value = v + i*(stepval+bs)*bs;
562           for ( j=0; j<bs; j++,row++ ) {
563             for ( k=0; k<n; k++ ) {
564               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
565                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
566               }
567             }
568           }
569         } else {
570           for ( j=0; j<n; j++ ) {
571             value = v + j*(stepval+bs)*bs + i*bs;
572             col   = in[j]*bs;
573             for ( k=0; k<bs; k++,col++,value+=stepval) {
574               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
575                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
576               }
577             }
578           }
579         }
580       }
581     }
582   }
583 #if defined(USE_PETSC_BOPT_g)
584   baij->ht_total_ct = total_ct;
585   baij->ht_insert_ct = insert_ct;
586 #endif
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNC__
591 #define __FUNC__ "MatGetValues_MPIBAIJ"
592 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
593 {
594   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
595   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
596   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
597 
598   PetscFunctionBegin;
599   for ( i=0; i<m; i++ ) {
600     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
601     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
602     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
603       row = idxm[i] - bsrstart;
604       for ( j=0; j<n; j++ ) {
605         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
606         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
607         if (idxn[j] >= bscstart && idxn[j] < bscend){
608           col = idxn[j] - bscstart;
609           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
610         } else {
611           if (!baij->colmap) {
612             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
613           }
614           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
615              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
616           else {
617             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
618             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
619           }
620         }
621       }
622     } else {
623       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
624     }
625   }
626  PetscFunctionReturn(0);
627 }
628 
629 #undef __FUNC__
630 #define __FUNC__ "MatNorm_MPIBAIJ"
631 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
632 {
633   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
634   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
635   int        ierr, i,bs2=baij->bs2;
636   double     sum = 0.0;
637   Scalar     *v;
638 
639   PetscFunctionBegin;
640   if (baij->size == 1) {
641     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
642   } else {
643     if (type == NORM_FROBENIUS) {
644       v = amat->a;
645       for (i=0; i<amat->nz*bs2; i++ ) {
646 #if defined(USE_PETSC_COMPLEX)
647         sum += PetscReal(PetscConj(*v)*(*v)); v++;
648 #else
649         sum += (*v)*(*v); v++;
650 #endif
651       }
652       v = bmat->a;
653       for (i=0; i<bmat->nz*bs2; i++ ) {
654 #if defined(USE_PETSC_COMPLEX)
655         sum += PetscReal(PetscConj(*v)*(*v)); v++;
656 #else
657         sum += (*v)*(*v); v++;
658 #endif
659       }
660       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
661       *norm = sqrt(*norm);
662     } else {
663       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
664     }
665   }
666   PetscFunctionReturn(0);
667 }
668 
669 #undef __FUNC__
670 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
671 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
672 {
673   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
674   MPI_Comm    comm = mat->comm;
675   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
676   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
677   MPI_Request *send_waits,*recv_waits;
678   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
679   InsertMode  addv;
680   Scalar      *rvalues,*svalues;
681 
682   PetscFunctionBegin;
683   if (baij->donotstash) {
684     baij->svalues    = 0; baij->rvalues    = 0;
685     baij->nsends     = 0; baij->nrecvs     = 0;
686     baij->send_waits = 0; baij->recv_waits = 0;
687     baij->rmax       = 0;
688     PetscFunctionReturn(0);
689   }
690 
691   /* make sure all processors are either in INSERTMODE or ADDMODE */
692   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
693   if (addv == (ADD_VALUES|INSERT_VALUES)) {
694     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
695   }
696   mat->insertmode = addv; /* in case this processor had no cache */
697 
698   /*  first count number of contributors to each processor */
699   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
700   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
701   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
702   for ( i=0; i<baij->stash.n; i++ ) {
703     idx = baij->stash.idx[i];
704     for ( j=0; j<size; j++ ) {
705       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
706         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
707       }
708     }
709   }
710   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
711 
712   /* inform other processors of number of messages and max length*/
713   work      = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
714   ierr      = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
715   nreceives = work[rank];
716   ierr      = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
717   nmax      = work[rank];
718   PetscFree(work);
719 
720   /* post receives:
721        1) each message will consist of ordered pairs
722      (global index,value) we store the global index as a double
723      to simplify the message passing.
724        2) since we don't know how long each individual message is we
725      allocate the largest needed buffer for each receive. Potentially
726      this is a lot of wasted space.
727 
728 
729        This could be done better.
730   */
731   rvalues    = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
732   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
733   for ( i=0; i<nreceives; i++ ) {
734     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
735   }
736 
737   /* do sends:
738       1) starts[i] gives the starting index in svalues for stuff going to
739          the ith processor
740   */
741   svalues    = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
742   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
743   starts     = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
744   starts[0] = 0;
745   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
746   for ( i=0; i<baij->stash.n; i++ ) {
747     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
748     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
749     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
750   }
751   PetscFree(owner);
752   starts[0] = 0;
753   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
754   count = 0;
755   for ( i=0; i<size; i++ ) {
756     if (procs[i]) {
757       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
758     }
759   }
760   PetscFree(starts); PetscFree(nprocs);
761 
762   /* Free cache space */
763   PLogInfo(baij->A,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
764   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
765 
766   baij->svalues    = svalues;    baij->rvalues    = rvalues;
767   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
768   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
769   baij->rmax       = nmax;
770 
771   PetscFunctionReturn(0);
772 }
773 
774 /*
775   Creates the hash table, and sets the table
776   This table is created only once.
777   If new entried need to be added to the matrix
778   then the hash table has to be destroyed and
779   recreated.
780 */
781 #undef __FUNC__
782 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
783 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
784 {
785   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
786   Mat         A = baij->A, B=baij->B;
787   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
788   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
789   int         size,bs2=baij->bs2,rstart=baij->rstart;
790   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
791   int         *HT,key;
792   Scalar      **HD;
793   double      tmp;
794 #if defined(USE_PETSC_BOPT_g)
795   int         ct=0,max=0;
796 #endif
797 
798   PetscFunctionBegin;
799   baij->ht_size=(int)(factor*nz);
800   size = baij->ht_size;
801 
802   if (baij->ht) {
803     PetscFunctionReturn(0);
804   }
805 
806   /* Allocate Memory for Hash Table */
807   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd);
808   baij->ht = (int*)(baij->hd + size);
809   HD = baij->hd;
810   HT = baij->ht;
811 
812 
813   PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));
814 
815 
816   /* Loop Over A */
817   for ( i=0; i<a->mbs; i++ ) {
818     for ( j=ai[i]; j<ai[i+1]; j++ ) {
819       row = i+rstart;
820       col = aj[j]+cstart;
821 
822       key = row*Nbs + col + 1;
823       h1  = HASH(size,key,tmp);
824       for ( k=0; k<size; k++ ){
825         if (HT[(h1+k)%size] == 0.0) {
826           HT[(h1+k)%size] = key;
827           HD[(h1+k)%size] = a->a + j*bs2;
828           break;
829 #if defined(USE_PETSC_BOPT_g)
830         } else {
831           ct++;
832 #endif
833         }
834       }
835 #if defined(USE_PETSC_BOPT_g)
836       if (k> max) max = k;
837 #endif
838     }
839   }
840   /* Loop Over B */
841   for ( i=0; i<b->mbs; i++ ) {
842     for ( j=bi[i]; j<bi[i+1]; j++ ) {
843       row = i+rstart;
844       col = garray[bj[j]];
845       key = row*Nbs + col + 1;
846       h1  = HASH(size,key,tmp);
847       for ( k=0; k<size; k++ ){
848         if (HT[(h1+k)%size] == 0.0) {
849           HT[(h1+k)%size] = key;
850           HD[(h1+k)%size] = b->a + j*bs2;
851           break;
852 #if defined(USE_PETSC_BOPT_g)
853         } else {
854           ct++;
855 #endif
856         }
857       }
858 #if defined(USE_PETSC_BOPT_g)
859       if (k> max) max = k;
860 #endif
861     }
862   }
863 
864   /* Print Summary */
865 #if defined(USE_PETSC_BOPT_g)
866   for ( i=0,j=0; i<size; i++)
867     if (HT[i]) {j++;}
868   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",
869            (j== 0)? 0.0:((double)(ct+j))/j,max);
870 #endif
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNC__
875 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
876 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
877 {
878   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
879   MPI_Status  *send_status,recv_status;
880   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
881   int         bs=baij->bs,row,col,other_disassembled;
882   Scalar      *values,val;
883   InsertMode  addv = mat->insertmode;
884 
885   PetscFunctionBegin;
886   /*  wait on receives */
887   while (count) {
888     ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
889     /* unpack receives into our local space */
890     values = baij->rvalues + 3*imdex*baij->rmax;
891     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
892     n = n/3;
893     for ( i=0; i<n; i++ ) {
894       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
895       col = (int) PetscReal(values[3*i+1]);
896       val = values[3*i+2];
897       if (col >= baij->cstart*bs && col < baij->cend*bs) {
898         col -= baij->cstart*bs;
899         ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
900       } else {
901         if (mat->was_assembled) {
902           if (!baij->colmap) {
903             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
904           }
905           col = (baij->colmap[col/bs]) - 1 + col%bs;
906           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
907             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
908             col = (int) PetscReal(values[3*i+1]);
909           }
910         }
911         ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
912       }
913     }
914     count--;
915   }
916   if (baij->recv_waits) PetscFree(baij->recv_waits);
917   if (baij->rvalues)    PetscFree(baij->rvalues);
918 
919   /* wait on sends */
920   if (baij->nsends) {
921     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
922     ierr        = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr);
923     PetscFree(send_status);
924   }
925   if (baij->send_waits) PetscFree(baij->send_waits);
926   if (baij->svalues)    PetscFree(baij->svalues);
927 
928   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
929   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
930 
931   /* determine if any processor has disassembled, if so we must
932      also disassemble ourselfs, in order that we may reassemble. */
933   /*
934      if nonzero structure of submatrix B cannot change then we know that
935      no processor disassembled thus we can skip this stuff
936   */
937   if (!((Mat_SeqBAIJ*) baij->B->data)->nonew)  {
938     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
939     if (mat->was_assembled && !other_disassembled) {
940       ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
941     }
942   }
943 
944   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
945     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
946   }
947   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
948   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
949 
950 #if defined(USE_PETSC_BOPT_g)
951   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
952     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",
953              ((double)baij->ht_total_ct)/baij->ht_insert_ct);
954     baij->ht_total_ct  = 0;
955     baij->ht_insert_ct = 0;
956   }
957 #endif
958   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
959     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr);
960     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
961     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
962   }
963 
964   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
965   PetscFunctionReturn(0);
966 }
967 
968 #undef __FUNC__
969 #define __FUNC__ "MatView_MPIBAIJ_Binary"
970 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
971 {
972   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
973   int          ierr;
974 
975   PetscFunctionBegin;
976   if (baij->size == 1) {
977     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
978   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
979   PetscFunctionReturn(0);
980 }
981 
982 #undef __FUNC__
983 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
984 static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
985 {
986   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
987   int          ierr, format,rank,bs = baij->bs;
988   FILE         *fd;
989   ViewerType   vtype;
990 
991   PetscFunctionBegin;
992   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
993   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
994     ierr = ViewerGetFormat(viewer,&format);
995     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
996       MatInfo info;
997       MPI_Comm_rank(mat->comm,&rank);
998       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
999       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
1000       PetscSequentialPhaseBegin(mat->comm,1);
1001       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1002               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
1003               baij->bs,(int)info.memory);
1004       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
1005       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
1006       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
1007       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
1008       fflush(fd);
1009       PetscSequentialPhaseEnd(mat->comm,1);
1010       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
1011       PetscFunctionReturn(0);
1012     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
1013       PetscPrintf(mat->comm,"  block size is %d\n",bs);
1014       PetscFunctionReturn(0);
1015     }
1016   }
1017 
1018   if (vtype == DRAW_VIEWER) {
1019     Draw       draw;
1020     PetscTruth isnull;
1021     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
1022     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1023   }
1024 
1025   if (vtype == ASCII_FILE_VIEWER) {
1026     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1027     PetscSequentialPhaseBegin(mat->comm,1);
1028     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
1029            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
1030             baij->cstart*bs,baij->cend*bs);
1031     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
1032     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
1033     fflush(fd);
1034     PetscSequentialPhaseEnd(mat->comm,1);
1035   } else {
1036     int size = baij->size;
1037     rank = baij->rank;
1038     if (size == 1) {
1039       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
1040     } else {
1041       /* assemble the entire matrix onto first processor. */
1042       Mat         A;
1043       Mat_SeqBAIJ *Aloc;
1044       int         M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals;
1045       int         mbs=baij->mbs;
1046       Scalar      *a;
1047 
1048       if (!rank) {
1049         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1050       } else {
1051         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1052       }
1053       PLogObjectParent(mat,A);
1054 
1055       /* copy over the A part */
1056       Aloc = (Mat_SeqBAIJ*) baij->A->data;
1057       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1058       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
1059 
1060       for ( i=0; i<mbs; i++ ) {
1061         rvals[0] = bs*(baij->rstart + i);
1062         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1063         for ( j=ai[i]; j<ai[i+1]; j++ ) {
1064           col = (baij->cstart+aj[j])*bs;
1065           for (k=0; k<bs; k++ ) {
1066             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1067             col++; a += bs;
1068           }
1069         }
1070       }
1071       /* copy over the B part */
1072       Aloc = (Mat_SeqBAIJ*) baij->B->data;
1073       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1074       for ( i=0; i<mbs; i++ ) {
1075         rvals[0] = bs*(baij->rstart + i);
1076         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1077         for ( j=ai[i]; j<ai[i+1]; j++ ) {
1078           col = baij->garray[aj[j]]*bs;
1079           for (k=0; k<bs; k++ ) {
1080             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1081             col++; a += bs;
1082           }
1083         }
1084       }
1085       PetscFree(rvals);
1086       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1087       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1088       /*
1089          Everyone has to call to draw the matrix since the graphics waits are
1090          synchronized across all processors that share the Draw object
1091       */
1092       if (!rank || vtype == DRAW_VIEWER) {
1093         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
1094       }
1095       ierr = MatDestroy(A); CHKERRQ(ierr);
1096     }
1097   }
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 
1102 
1103 #undef __FUNC__
1104 #define __FUNC__ "MatView_MPIBAIJ"
1105 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1106 {
1107   int         ierr;
1108   ViewerType  vtype;
1109 
1110   PetscFunctionBegin;
1111   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
1112   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
1113       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
1114     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
1115   } else if (vtype == BINARY_FILE_VIEWER) {
1116     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1117   } else {
1118     SETERRQ(1,1,"Viewer type not supported by PETSc object");
1119   }
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNC__
1124 #define __FUNC__ "MatDestroy_MPIBAIJ"
1125 int MatDestroy_MPIBAIJ(Mat mat)
1126 {
1127   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1128   int         ierr;
1129 
1130   PetscFunctionBegin;
1131   if (--mat->refct > 0) PetscFunctionReturn(0);
1132 
1133   if (mat->mapping) {
1134     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
1135   }
1136   if (mat->bmapping) {
1137     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr);
1138   }
1139   if (mat->rmap) {
1140     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
1141   }
1142   if (mat->cmap) {
1143     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
1144   }
1145 #if defined(USE_PETSC_LOG)
1146   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N);
1147 #endif
1148 
1149   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
1150   PetscFree(baij->rowners);
1151   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
1152   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1153   if (baij->colmap) PetscFree(baij->colmap);
1154   if (baij->garray) PetscFree(baij->garray);
1155   if (baij->lvec)   VecDestroy(baij->lvec);
1156   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
1157   if (baij->rowvalues) PetscFree(baij->rowvalues);
1158   if (baij->barray) PetscFree(baij->barray);
1159   if (baij->hd) PetscFree(baij->hd);
1160   PetscFree(baij);
1161   PLogObjectDestroy(mat);
1162   PetscHeaderDestroy(mat);
1163   PetscFunctionReturn(0);
1164 }
1165 
1166 #undef __FUNC__
1167 #define __FUNC__ "MatMult_MPIBAIJ"
1168 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1169 {
1170   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1171   int         ierr, nt;
1172 
1173   PetscFunctionBegin;
1174   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1175   if (nt != a->n) {
1176     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1177   }
1178   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1179   if (nt != a->m) {
1180     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1181   }
1182   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1183   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
1184   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1185   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
1186   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 #undef __FUNC__
1191 #define __FUNC__ "MatMultAdd_MPIBAIJ"
1192 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1193 {
1194   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1195   int        ierr;
1196 
1197   PetscFunctionBegin;
1198   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1199   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1200   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1201   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 #undef __FUNC__
1206 #define __FUNC__ "MatMultTrans_MPIBAIJ"
1207 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1208 {
1209   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1210   int         ierr;
1211 
1212   PetscFunctionBegin;
1213   /* do nondiagonal part */
1214   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1215   /* send it on its way */
1216   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1217   /* do local part */
1218   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
1219   /* receive remote parts: note this assumes the values are not actually */
1220   /* inserted in yy until the next line, which is true for my implementation*/
1221   /* but is not perhaps always true. */
1222   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1223   PetscFunctionReturn(0);
1224 }
1225 
1226 #undef __FUNC__
1227 #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1228 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1229 {
1230   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1231   int         ierr;
1232 
1233   PetscFunctionBegin;
1234   /* do nondiagonal part */
1235   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1236   /* send it on its way */
1237   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1238   /* do local part */
1239   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1240   /* receive remote parts: note this assumes the values are not actually */
1241   /* inserted in yy until the next line, which is true for my implementation*/
1242   /* but is not perhaps always true. */
1243   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1244   PetscFunctionReturn(0);
1245 }
1246 
1247 /*
1248   This only works correctly for square matrices where the subblock A->A is the
1249    diagonal block
1250 */
1251 #undef __FUNC__
1252 #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1253 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1254 {
1255   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1256   int         ierr;
1257 
1258   PetscFunctionBegin;
1259   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
1260   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 #undef __FUNC__
1265 #define __FUNC__ "MatScale_MPIBAIJ"
1266 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1267 {
1268   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1269   int         ierr;
1270 
1271   PetscFunctionBegin;
1272   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
1273   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 #undef __FUNC__
1278 #define __FUNC__ "MatGetSize_MPIBAIJ"
1279 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
1280 {
1281   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1282 
1283   PetscFunctionBegin;
1284   if (m) *m = mat->M;
1285   if (n) *n = mat->N;
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 #undef __FUNC__
1290 #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1291 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
1292 {
1293   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1294 
1295   PetscFunctionBegin;
1296   *m = mat->m; *n = mat->n;
1297   PetscFunctionReturn(0);
1298 }
1299 
1300 #undef __FUNC__
1301 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1302 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1303 {
1304   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1305 
1306   PetscFunctionBegin;
1307   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1312 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1313 
1314 #undef __FUNC__
1315 #define __FUNC__ "MatGetRow_MPIBAIJ"
1316 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1317 {
1318   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1319   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1320   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1321   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1322   int        *cmap, *idx_p,cstart = mat->cstart;
1323 
1324   PetscFunctionBegin;
1325   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1326   mat->getrowactive = PETSC_TRUE;
1327 
1328   if (!mat->rowvalues && (idx || v)) {
1329     /*
1330         allocate enough space to hold information from the longest row.
1331     */
1332     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1333     int     max = 1,mbs = mat->mbs,tmp;
1334     for ( i=0; i<mbs; i++ ) {
1335       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1336       if (max < tmp) { max = tmp; }
1337     }
1338     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
1339     CHKPTRQ(mat->rowvalues);
1340     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1341   }
1342 
1343   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1344   lrow = row - brstart;
1345 
1346   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1347   if (!v)   {pvA = 0; pvB = 0;}
1348   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1349   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1350   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1351   nztot = nzA + nzB;
1352 
1353   cmap  = mat->garray;
1354   if (v  || idx) {
1355     if (nztot) {
1356       /* Sort by increasing column numbers, assuming A and B already sorted */
1357       int imark = -1;
1358       if (v) {
1359         *v = v_p = mat->rowvalues;
1360         for ( i=0; i<nzB; i++ ) {
1361           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1362           else break;
1363         }
1364         imark = i;
1365         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1366         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1367       }
1368       if (idx) {
1369         *idx = idx_p = mat->rowindices;
1370         if (imark > -1) {
1371           for ( i=0; i<imark; i++ ) {
1372             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1373           }
1374         } else {
1375           for ( i=0; i<nzB; i++ ) {
1376             if (cmap[cworkB[i]/bs] < cstart)
1377               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1378             else break;
1379           }
1380           imark = i;
1381         }
1382         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1383         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1384       }
1385     } else {
1386       if (idx) *idx = 0;
1387       if (v)   *v   = 0;
1388     }
1389   }
1390   *nz = nztot;
1391   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1392   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1393   PetscFunctionReturn(0);
1394 }
1395 
1396 #undef __FUNC__
1397 #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1398 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1399 {
1400   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1401 
1402   PetscFunctionBegin;
1403   if (baij->getrowactive == PETSC_FALSE) {
1404     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1405   }
1406   baij->getrowactive = PETSC_FALSE;
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 #undef __FUNC__
1411 #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1412 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1413 {
1414   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1415 
1416   PetscFunctionBegin;
1417   *bs = baij->bs;
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNC__
1422 #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1423 int MatZeroEntries_MPIBAIJ(Mat A)
1424 {
1425   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
1426   int         ierr;
1427 
1428   PetscFunctionBegin;
1429   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
1430   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 #undef __FUNC__
1435 #define __FUNC__ "MatGetInfo_MPIBAIJ"
1436 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1437 {
1438   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
1439   Mat         A = a->A, B = a->B;
1440   int         ierr;
1441   double      isend[5], irecv[5];
1442 
1443   PetscFunctionBegin;
1444   info->block_size     = (double)a->bs;
1445   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1446   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1447   isend[3] = info->memory;  isend[4] = info->mallocs;
1448   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1449   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1450   isend[3] += info->memory;  isend[4] += info->mallocs;
1451   if (flag == MAT_LOCAL) {
1452     info->nz_used      = isend[0];
1453     info->nz_allocated = isend[1];
1454     info->nz_unneeded  = isend[2];
1455     info->memory       = isend[3];
1456     info->mallocs      = isend[4];
1457   } else if (flag == MAT_GLOBAL_MAX) {
1458     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1459     info->nz_used      = irecv[0];
1460     info->nz_allocated = irecv[1];
1461     info->nz_unneeded  = irecv[2];
1462     info->memory       = irecv[3];
1463     info->mallocs      = irecv[4];
1464   } else if (flag == MAT_GLOBAL_SUM) {
1465     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1466     info->nz_used      = irecv[0];
1467     info->nz_allocated = irecv[1];
1468     info->nz_unneeded  = irecv[2];
1469     info->memory       = irecv[3];
1470     info->mallocs      = irecv[4];
1471   }
1472   info->rows_global       = (double)a->M;
1473   info->columns_global    = (double)a->N;
1474   info->rows_local        = (double)a->m;
1475   info->columns_local     = (double)a->N;
1476   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1477   info->fill_ratio_needed = 0;
1478   info->factor_mallocs    = 0;
1479   PetscFunctionReturn(0);
1480 }
1481 
1482 #undef __FUNC__
1483 #define __FUNC__ "MatSetOption_MPIBAIJ"
1484 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1485 {
1486   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1487 
1488   PetscFunctionBegin;
1489   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1490       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1491       op == MAT_COLUMNS_UNSORTED ||
1492       op == MAT_COLUMNS_SORTED ||
1493       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1494       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1495         MatSetOption(a->A,op);
1496         MatSetOption(a->B,op);
1497   } else if (op == MAT_ROW_ORIENTED) {
1498         a->roworiented = 1;
1499         MatSetOption(a->A,op);
1500         MatSetOption(a->B,op);
1501   } else if (op == MAT_ROWS_SORTED ||
1502              op == MAT_ROWS_UNSORTED ||
1503              op == MAT_SYMMETRIC ||
1504              op == MAT_STRUCTURALLY_SYMMETRIC ||
1505              op == MAT_YES_NEW_DIAGONALS ||
1506              op == MAT_USE_HASH_TABLE)
1507     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1508   else if (op == MAT_COLUMN_ORIENTED) {
1509     a->roworiented = 0;
1510     MatSetOption(a->A,op);
1511     MatSetOption(a->B,op);
1512   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1513     a->donotstash = 1;
1514   } else if (op == MAT_NO_NEW_DIAGONALS) {
1515     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1516   } else if (op == MAT_USE_HASH_TABLE) {
1517     a->ht_flag = 1;
1518   } else {
1519     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1520   }
1521   PetscFunctionReturn(0);
1522 }
1523 
1524 #undef __FUNC__
1525 #define __FUNC__ "MatTranspose_MPIBAIJ("
1526 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1527 {
1528   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
1529   Mat_SeqBAIJ *Aloc;
1530   Mat        B;
1531   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1532   int        bs=baij->bs,mbs=baij->mbs;
1533   Scalar     *a;
1534 
1535   PetscFunctionBegin;
1536   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1537   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1538   CHKERRQ(ierr);
1539 
1540   /* copy over the A part */
1541   Aloc = (Mat_SeqBAIJ*) baij->A->data;
1542   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1543   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
1544 
1545   for ( i=0; i<mbs; i++ ) {
1546     rvals[0] = bs*(baij->rstart + i);
1547     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1548     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1549       col = (baij->cstart+aj[j])*bs;
1550       for (k=0; k<bs; k++ ) {
1551         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1552         col++; a += bs;
1553       }
1554     }
1555   }
1556   /* copy over the B part */
1557   Aloc = (Mat_SeqBAIJ*) baij->B->data;
1558   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1559   for ( i=0; i<mbs; i++ ) {
1560     rvals[0] = bs*(baij->rstart + i);
1561     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1562     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1563       col = baij->garray[aj[j]]*bs;
1564       for (k=0; k<bs; k++ ) {
1565         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1566         col++; a += bs;
1567       }
1568     }
1569   }
1570   PetscFree(rvals);
1571   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1572   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1573 
1574   if (matout != PETSC_NULL) {
1575     *matout = B;
1576   } else {
1577     PetscOps *Abops;
1578     MatOps   Aops;
1579 
1580     /* This isn't really an in-place transpose .... but free data structures from baij */
1581     PetscFree(baij->rowners);
1582     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
1583     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1584     if (baij->colmap) PetscFree(baij->colmap);
1585     if (baij->garray) PetscFree(baij->garray);
1586     if (baij->lvec) VecDestroy(baij->lvec);
1587     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
1588     PetscFree(baij);
1589 
1590     /*
1591        This is horrible, horrible code. We need to keep the
1592       A pointers for the bops and ops but copy everything
1593       else from C.
1594     */
1595     Abops = A->bops;
1596     Aops  = A->ops;
1597     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1598     A->bops = Abops;
1599     A->ops  = Aops;
1600 
1601     PetscHeaderDestroy(B);
1602   }
1603   PetscFunctionReturn(0);
1604 }
1605 
1606 #undef __FUNC__
1607 #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
1608 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
1609 {
1610   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
1611   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
1612   int ierr,s1,s2,s3;
1613 
1614   PetscFunctionBegin;
1615   if (ll)  {
1616     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
1617     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1618     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes");
1619     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
1620     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
1621   }
1622   if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector");
1623   PetscFunctionReturn(0);
1624 }
1625 
1626 #undef __FUNC__
1627 #define __FUNC__ "MatZeroRows_MPIBAIJ"
1628 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1629 {
1630   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
1631   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1632   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1633   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1634   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1635   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1636   MPI_Comm       comm = A->comm;
1637   MPI_Request    *send_waits,*recv_waits;
1638   MPI_Status     recv_status,*send_status;
1639   IS             istmp;
1640   PetscTruth     localdiag;
1641 
1642   PetscFunctionBegin;
1643   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
1644   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
1645 
1646   /*  first count number of contributors to each processor */
1647   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
1648   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1649   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
1650   for ( i=0; i<N; i++ ) {
1651     idx = rows[i];
1652     found = 0;
1653     for ( j=0; j<size; j++ ) {
1654       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1655         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1656       }
1657     }
1658     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1659   }
1660   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1661 
1662   /* inform other processors of number of messages and max length*/
1663   work   = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
1664   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1665   nrecvs = work[rank];
1666   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
1667   nmax   = work[rank];
1668   PetscFree(work);
1669 
1670   /* post receives:   */
1671   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues);
1672   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1673   for ( i=0; i<nrecvs; i++ ) {
1674     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1675   }
1676 
1677   /* do sends:
1678      1) starts[i] gives the starting index in svalues for stuff going to
1679      the ith processor
1680   */
1681   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
1682   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1683   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
1684   starts[0] = 0;
1685   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1686   for ( i=0; i<N; i++ ) {
1687     svalues[starts[owner[i]]++] = rows[i];
1688   }
1689   ISRestoreIndices(is,&rows);
1690 
1691   starts[0] = 0;
1692   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1693   count = 0;
1694   for ( i=0; i<size; i++ ) {
1695     if (procs[i]) {
1696       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1697     }
1698   }
1699   PetscFree(starts);
1700 
1701   base = owners[rank]*bs;
1702 
1703   /*  wait on receives */
1704   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
1705   source = lens + nrecvs;
1706   count  = nrecvs; slen = 0;
1707   while (count) {
1708     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1709     /* unpack receives into our local space */
1710     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1711     source[imdex]  = recv_status.MPI_SOURCE;
1712     lens[imdex]  = n;
1713     slen += n;
1714     count--;
1715   }
1716   PetscFree(recv_waits);
1717 
1718   /* move the data into the send scatter */
1719   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
1720   count = 0;
1721   for ( i=0; i<nrecvs; i++ ) {
1722     values = rvalues + i*nmax;
1723     for ( j=0; j<lens[i]; j++ ) {
1724       lrows[count++] = values[j] - base;
1725     }
1726   }
1727   PetscFree(rvalues); PetscFree(lens);
1728   PetscFree(owner); PetscFree(nprocs);
1729 
1730   /* actually zap the local rows */
1731   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1732   PLogObjectParent(A,istmp);
1733 
1734   /*
1735         Zero the required rows. If the "diagonal block" of the matrix
1736      is square and the user wishes to set the diagonal we use seperate
1737      code so that MatSetValues() is not called for each diagonal allocating
1738      new memory, thus calling lots of mallocs and slowing things down.
1739 
1740        Contributed by: Mathew Knepley
1741   */
1742   localdiag = PETSC_FALSE;
1743   if (diag && (l->A->M == l->A->N)) {
1744     localdiag = PETSC_TRUE;
1745     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
1746   } else {
1747     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
1748   }
1749   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
1750   ierr = ISDestroy(istmp); CHKERRQ(ierr);
1751 
1752   if (diag && (localdiag == PETSC_FALSE)) {
1753     for ( i = 0; i < slen; i++ ) {
1754       row = lrows[i] + rstart_bs;
1755       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr);
1756     }
1757     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1758     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1759   }
1760   PetscFree(lrows);
1761 
1762   /* wait on sends */
1763   if (nsends) {
1764     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1765     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1766     PetscFree(send_status);
1767   }
1768   PetscFree(send_waits); PetscFree(svalues);
1769 
1770   PetscFunctionReturn(0);
1771 }
1772 
1773 extern int MatPrintHelp_SeqBAIJ(Mat);
1774 #undef __FUNC__
1775 #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1776 int MatPrintHelp_MPIBAIJ(Mat A)
1777 {
1778   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1779   MPI_Comm    comm = A->comm;
1780   static int  called = 0;
1781   int         ierr;
1782 
1783   PetscFunctionBegin;
1784   if (!a->rank) {
1785     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1786   }
1787   if (called) {PetscFunctionReturn(0);} else called = 1;
1788   (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");
1789   (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");
1790   PetscFunctionReturn(0);
1791 }
1792 
1793 #undef __FUNC__
1794 #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1795 int MatSetUnfactored_MPIBAIJ(Mat A)
1796 {
1797   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1798   int         ierr;
1799 
1800   PetscFunctionBegin;
1801   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1802   PetscFunctionReturn(0);
1803 }
1804 
1805 static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
1806 
1807 /* -------------------------------------------------------------------*/
1808 static struct _MatOps MatOps_Values = {
1809   MatSetValues_MPIBAIJ,
1810   MatGetRow_MPIBAIJ,
1811   MatRestoreRow_MPIBAIJ,
1812   MatMult_MPIBAIJ,
1813   MatMultAdd_MPIBAIJ,
1814   MatMultTrans_MPIBAIJ,
1815   MatMultTransAdd_MPIBAIJ,
1816   0,
1817   0,
1818   0,
1819   0,
1820   0,
1821   0,
1822   0,
1823   MatTranspose_MPIBAIJ,
1824   MatGetInfo_MPIBAIJ,
1825   0,
1826   MatGetDiagonal_MPIBAIJ,
1827   MatDiagonalScale_MPIBAIJ,
1828   MatNorm_MPIBAIJ,
1829   MatAssemblyBegin_MPIBAIJ,
1830   MatAssemblyEnd_MPIBAIJ,
1831   0,
1832   MatSetOption_MPIBAIJ,
1833   MatZeroEntries_MPIBAIJ,
1834   MatZeroRows_MPIBAIJ,
1835   0,
1836   0,
1837   0,
1838   0,
1839   MatGetSize_MPIBAIJ,
1840   MatGetLocalSize_MPIBAIJ,
1841   MatGetOwnershipRange_MPIBAIJ,
1842   0,
1843   0,
1844   0,
1845   0,
1846   MatConvertSameType_MPIBAIJ,
1847   0,
1848   0,
1849   0,
1850   0,
1851   0,
1852   MatGetSubMatrices_MPIBAIJ,
1853   MatIncreaseOverlap_MPIBAIJ,
1854   MatGetValues_MPIBAIJ,
1855   0,
1856   MatPrintHelp_MPIBAIJ,
1857   MatScale_MPIBAIJ,
1858   0,
1859   0,
1860   0,
1861   MatGetBlockSize_MPIBAIJ,
1862   0,
1863   0,
1864   0,
1865   0,
1866   0,
1867   0,
1868   MatSetUnfactored_MPIBAIJ,
1869   0,
1870   MatSetValuesBlocked_MPIBAIJ,
1871   0,
1872   0,
1873   0,
1874   MatGetMaps_Petsc};
1875 
1876 
1877 #undef __FUNC__
1878 #define __FUNC__ "MatCreateMPIBAIJ"
1879 /*@C
1880    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1881    (block compressed row).  For good matrix assembly performance
1882    the user should preallocate the matrix storage by setting the parameters
1883    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1884    performance can be increased by more than a factor of 50.
1885 
1886    Collective on MPI_Comm
1887 
1888    Input Parameters:
1889 +  comm - MPI communicator
1890 .  bs   - size of blockk
1891 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1892            This value should be the same as the local size used in creating the
1893            y vector for the matrix-vector product y = Ax.
1894 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1895            This value should be the same as the local size used in creating the
1896            x vector for the matrix-vector product y = Ax.
1897 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1898 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1899 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1900            submatrix  (same for all local rows)
1901 .  d_nzz - array containing the number of block nonzeros in the various block rows
1902            of the in diagonal portion of the local (possibly different for each block
1903            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1904 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1905            submatrix (same for all local rows).
1906 -  o_nzz - array containing the number of nonzeros in the various block rows of the
1907            off-diagonal portion of the local submatrix (possibly different for
1908            each block row) or PETSC_NULL.
1909 
1910    Output Parameter:
1911 .  A - the matrix
1912 
1913    Options Database Keys:
1914 .   -mat_no_unroll - uses code that does not unroll the loops in the
1915                      block calculations (much slower)
1916 .   -mat_block_size - size of the blocks to use
1917 
1918    Notes:
1919    The user MUST specify either the local or global matrix dimensions
1920    (possibly both).
1921 
1922    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1923    than it must be used on all processors that share the object for that argument.
1924 
1925    Storage Information:
1926    For a square global matrix we define each processor's diagonal portion
1927    to be its local rows and the corresponding columns (a square submatrix);
1928    each processor's off-diagonal portion encompasses the remainder of the
1929    local matrix (a rectangular submatrix).
1930 
1931    The user can specify preallocated storage for the diagonal part of
1932    the local submatrix with either d_nz or d_nnz (not both).  Set
1933    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1934    memory allocation.  Likewise, specify preallocated storage for the
1935    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1936 
1937    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1938    the figure below we depict these three local rows and all columns (0-11).
1939 
1940 .vb
1941            0 1 2 3 4 5 6 7 8 9 10 11
1942           -------------------
1943    row 3  |  o o o d d d o o o o o o
1944    row 4  |  o o o d d d o o o o o o
1945    row 5  |  o o o d d d o o o o o o
1946           -------------------
1947 .ve
1948 
1949    Thus, any entries in the d locations are stored in the d (diagonal)
1950    submatrix, and any entries in the o locations are stored in the
1951    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1952    stored simply in the MATSEQBAIJ format for compressed row storage.
1953 
1954    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1955    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1956    In general, for PDE problems in which most nonzeros are near the diagonal,
1957    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1958    or you will get TERRIBLE performance; see the users' manual chapter on
1959    matrices.
1960 
1961 .keywords: matrix, block, aij, compressed row, sparse, parallel
1962 
1963 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ()
1964 @*/
1965 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
1966                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1967 {
1968   Mat          B;
1969   Mat_MPIBAIJ  *b;
1970   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg;
1971 
1972   PetscFunctionBegin;
1973   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
1974 
1975   MPI_Comm_size(comm,&size);
1976   if (size == 1) {
1977     if (M == PETSC_DECIDE) M = m;
1978     if (N == PETSC_DECIDE) N = n;
1979     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1980     PetscFunctionReturn(0);
1981   }
1982 
1983   *A = 0;
1984   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView);
1985   PLogObjectCreate(B);
1986   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
1987   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
1988   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1989 
1990   B->ops->destroy    = MatDestroy_MPIBAIJ;
1991   B->ops->view       = MatView_MPIBAIJ;
1992   B->mapping    = 0;
1993   B->factor     = 0;
1994   B->assembled  = PETSC_FALSE;
1995 
1996   B->insertmode = NOT_SET_VALUES;
1997   MPI_Comm_rank(comm,&b->rank);
1998   MPI_Comm_size(comm,&b->size);
1999 
2000   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
2001     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
2002   }
2003   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
2004     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
2005   }
2006   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
2007     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
2008   }
2009   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2010   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
2011 
2012   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
2013     work[0] = m; work[1] = n;
2014     mbs = m/bs; nbs = n/bs;
2015     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
2016     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
2017     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
2018   }
2019   if (m == PETSC_DECIDE) {
2020     Mbs = M/bs;
2021     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
2022     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
2023     m   = mbs*bs;
2024   }
2025   if (n == PETSC_DECIDE) {
2026     Nbs = N/bs;
2027     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
2028     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
2029     n   = nbs*bs;
2030   }
2031   if (mbs*bs != m || nbs*bs != n) {
2032     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
2033   }
2034 
2035   b->m = m; B->m = m;
2036   b->n = n; B->n = n;
2037   b->N = N; B->N = N;
2038   b->M = M; B->M = M;
2039   b->bs  = bs;
2040   b->bs2 = bs*bs;
2041   b->mbs = mbs;
2042   b->nbs = nbs;
2043   b->Mbs = Mbs;
2044   b->Nbs = Nbs;
2045 
2046   /* the information in the maps duplicates the information computed below, eventually
2047      we should remove the duplicate information that is not contained in the maps */
2048   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
2049   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
2050 
2051   /* build local table of row and column ownerships */
2052   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
2053   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2054   b->cowners = b->rowners + b->size + 2;
2055   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2056   b->rowners[0] = 0;
2057   for ( i=2; i<=b->size; i++ ) {
2058     b->rowners[i] += b->rowners[i-1];
2059   }
2060   b->rstart    = b->rowners[b->rank];
2061   b->rend      = b->rowners[b->rank+1];
2062   b->rstart_bs = b->rstart * bs;
2063   b->rend_bs   = b->rend * bs;
2064 
2065   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2066   b->cowners[0] = 0;
2067   for ( i=2; i<=b->size; i++ ) {
2068     b->cowners[i] += b->cowners[i-1];
2069   }
2070   b->cstart    = b->cowners[b->rank];
2071   b->cend      = b->cowners[b->rank+1];
2072   b->cstart_bs = b->cstart * bs;
2073   b->cend_bs   = b->cend * bs;
2074 
2075 
2076   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2077   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
2078   PLogObjectParent(B,b->A);
2079   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2080   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
2081   PLogObjectParent(B,b->B);
2082 
2083   /* build cache for off array entries formed */
2084   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
2085   b->donotstash  = 0;
2086   b->colmap      = 0;
2087   b->garray      = 0;
2088   b->roworiented = 1;
2089 
2090   /* stuff used in block assembly */
2091   b->barray       = 0;
2092 
2093   /* stuff used for matrix vector multiply */
2094   b->lvec         = 0;
2095   b->Mvctx        = 0;
2096 
2097   /* stuff for MatGetRow() */
2098   b->rowindices   = 0;
2099   b->rowvalues    = 0;
2100   b->getrowactive = PETSC_FALSE;
2101 
2102   /* hash table stuff */
2103   b->ht           = 0;
2104   b->hd           = 0;
2105   b->ht_size      = 0;
2106   b->ht_flag      = 0;
2107   b->ht_fact      = 0;
2108   b->ht_total_ct  = 0;
2109   b->ht_insert_ct = 0;
2110 
2111   *A = B;
2112   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr);
2113   if (flg) {
2114     double fact = 1.39;
2115     ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr);
2116     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr);
2117     if (fact <= 1.0) fact = 1.39;
2118     ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr);
2119     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2120   }
2121   PetscFunctionReturn(0);
2122 }
2123 
2124 #undef __FUNC__
2125 #define __FUNC__ "MatConvertSameType_MPIBAIJ"
2126 static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
2127 {
2128   Mat         mat;
2129   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
2130   int         ierr, len=0, flg;
2131 
2132   PetscFunctionBegin;
2133   *newmat       = 0;
2134   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView);
2135   PLogObjectCreate(mat);
2136   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
2137   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
2138   mat->ops->destroy    = MatDestroy_MPIBAIJ;
2139   mat->ops->view       = MatView_MPIBAIJ;
2140   mat->factor     = matin->factor;
2141   mat->assembled  = PETSC_TRUE;
2142 
2143   a->m = mat->m   = oldmat->m;
2144   a->n = mat->n   = oldmat->n;
2145   a->M = mat->M   = oldmat->M;
2146   a->N = mat->N   = oldmat->N;
2147 
2148   a->bs  = oldmat->bs;
2149   a->bs2 = oldmat->bs2;
2150   a->mbs = oldmat->mbs;
2151   a->nbs = oldmat->nbs;
2152   a->Mbs = oldmat->Mbs;
2153   a->Nbs = oldmat->Nbs;
2154 
2155   a->rstart       = oldmat->rstart;
2156   a->rend         = oldmat->rend;
2157   a->cstart       = oldmat->cstart;
2158   a->cend         = oldmat->cend;
2159   a->size         = oldmat->size;
2160   a->rank         = oldmat->rank;
2161   mat->insertmode = NOT_SET_VALUES;
2162   a->rowvalues    = 0;
2163   a->getrowactive = PETSC_FALSE;
2164   a->barray       = 0;
2165 
2166   /* hash table stuff */
2167   a->ht           = 0;
2168   a->hd           = 0;
2169   a->ht_size      = 0;
2170   a->ht_flag      = oldmat->ht_flag;
2171   a->ht_fact      = oldmat->ht_fact;
2172   a->ht_total_ct  = 0;
2173   a->ht_insert_ct = 0;
2174 
2175 
2176   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
2177   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2178   a->cowners = a->rowners + a->size + 2;
2179   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
2180   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
2181   if (oldmat->colmap) {
2182     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2183     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2184     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
2185   } else a->colmap = 0;
2186   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
2187     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
2188     PLogObjectMemory(mat,len*sizeof(int));
2189     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
2190   } else a->garray = 0;
2191 
2192   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
2193   PLogObjectParent(mat,a->lvec);
2194   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
2195   PLogObjectParent(mat,a->Mvctx);
2196   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
2197   PLogObjectParent(mat,a->A);
2198   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
2199   PLogObjectParent(mat,a->B);
2200   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
2201   if (flg) {
2202     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
2203   }
2204   *newmat = mat;
2205   PetscFunctionReturn(0);
2206 }
2207 
2208 #include "sys.h"
2209 
2210 #undef __FUNC__
2211 #define __FUNC__ "MatLoad_MPIBAIJ"
2212 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2213 {
2214   Mat          A;
2215   int          i, nz, ierr, j,rstart, rend, fd;
2216   Scalar       *vals,*buf;
2217   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2218   MPI_Status   status;
2219   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2220   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
2221   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2222   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2223   int          dcount,kmax,k,nzcount,tmp;
2224 
2225   PetscFunctionBegin;
2226   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2227 
2228   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
2229   if (!rank) {
2230     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2231     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
2232     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2233     if (header[3] < 0) {
2234       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2235     }
2236   }
2237 
2238   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2239   M = header[1]; N = header[2];
2240 
2241   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2242 
2243   /*
2244      This code adds extra rows to make sure the number of rows is
2245      divisible by the blocksize
2246   */
2247   Mbs        = M/bs;
2248   extra_rows = bs - M + bs*(Mbs);
2249   if (extra_rows == bs) extra_rows = 0;
2250   else                  Mbs++;
2251   if (extra_rows &&!rank) {
2252     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2253   }
2254 
2255   /* determine ownership of all rows */
2256   mbs = Mbs/size + ((Mbs % size) > rank);
2257   m   = mbs * bs;
2258   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
2259   browners = rowners + size + 1;
2260   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2261   rowners[0] = 0;
2262   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2263   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
2264   rstart = rowners[rank];
2265   rend   = rowners[rank+1];
2266 
2267   /* distribute row lengths to all processors */
2268   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
2269   if (!rank) {
2270     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
2271     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
2272     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2273     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
2274     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2275     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2276     PetscFree(sndcounts);
2277   } else {
2278     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
2279   }
2280 
2281   if (!rank) {
2282     /* calculate the number of nonzeros on each processor */
2283     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
2284     PetscMemzero(procsnz,size*sizeof(int));
2285     for ( i=0; i<size; i++ ) {
2286       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
2287         procsnz[i] += rowlengths[j];
2288       }
2289     }
2290     PetscFree(rowlengths);
2291 
2292     /* determine max buffer needed and allocate it */
2293     maxnz = 0;
2294     for ( i=0; i<size; i++ ) {
2295       maxnz = PetscMax(maxnz,procsnz[i]);
2296     }
2297     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
2298 
2299     /* read in my part of the matrix column indices  */
2300     nz = procsnz[0];
2301     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
2302     mycols = ibuf;
2303     if (size == 1)  nz -= extra_rows;
2304     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
2305     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2306 
2307     /* read in every ones (except the last) and ship off */
2308     for ( i=1; i<size-1; i++ ) {
2309       nz   = procsnz[i];
2310       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2311       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2312     }
2313     /* read in the stuff for the last proc */
2314     if ( size != 1 ) {
2315       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2316       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2317       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2318       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2319     }
2320     PetscFree(cols);
2321   } else {
2322     /* determine buffer space needed for message */
2323     nz = 0;
2324     for ( i=0; i<m; i++ ) {
2325       nz += locrowlens[i];
2326     }
2327     ibuf   = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
2328     mycols = ibuf;
2329     /* receive message of column indices*/
2330     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2331     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2332     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2333   }
2334 
2335   /* loop over local rows, determining number of off diagonal entries */
2336   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
2337   odlens = dlens + (rend-rstart);
2338   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
2339   PetscMemzero(mask,3*Mbs*sizeof(int));
2340   masked1 = mask    + Mbs;
2341   masked2 = masked1 + Mbs;
2342   rowcount = 0; nzcount = 0;
2343   for ( i=0; i<mbs; i++ ) {
2344     dcount  = 0;
2345     odcount = 0;
2346     for ( j=0; j<bs; j++ ) {
2347       kmax = locrowlens[rowcount];
2348       for ( k=0; k<kmax; k++ ) {
2349         tmp = mycols[nzcount++]/bs;
2350         if (!mask[tmp]) {
2351           mask[tmp] = 1;
2352           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
2353           else masked1[dcount++] = tmp;
2354         }
2355       }
2356       rowcount++;
2357     }
2358 
2359     dlens[i]  = dcount;
2360     odlens[i] = odcount;
2361 
2362     /* zero out the mask elements we set */
2363     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
2364     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
2365   }
2366 
2367   /* create our matrix */
2368   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
2369          CHKERRQ(ierr);
2370   A = *newmat;
2371   MatSetOption(A,MAT_COLUMNS_SORTED);
2372 
2373   if (!rank) {
2374     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
2375     /* read in my part of the matrix numerical values  */
2376     nz = procsnz[0];
2377     vals = buf;
2378     mycols = ibuf;
2379     if (size == 1)  nz -= extra_rows;
2380     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2381     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2382 
2383     /* insert into matrix */
2384     jj      = rstart*bs;
2385     for ( i=0; i<m; i++ ) {
2386       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2387       mycols += locrowlens[i];
2388       vals   += locrowlens[i];
2389       jj++;
2390     }
2391     /* read in other processors (except the last one) and ship out */
2392     for ( i=1; i<size-1; i++ ) {
2393       nz   = procsnz[i];
2394       vals = buf;
2395       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2396       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2397     }
2398     /* the last proc */
2399     if ( size != 1 ){
2400       nz   = procsnz[i] - extra_rows;
2401       vals = buf;
2402       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2403       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2404       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2405     }
2406     PetscFree(procsnz);
2407   } else {
2408     /* receive numeric values */
2409     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
2410 
2411     /* receive message of values*/
2412     vals   = buf;
2413     mycols = ibuf;
2414     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2415     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2416     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2417 
2418     /* insert into matrix */
2419     jj      = rstart*bs;
2420     for ( i=0; i<m; i++ ) {
2421       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2422       mycols += locrowlens[i];
2423       vals   += locrowlens[i];
2424       jj++;
2425     }
2426   }
2427   PetscFree(locrowlens);
2428   PetscFree(buf);
2429   PetscFree(ibuf);
2430   PetscFree(rowners);
2431   PetscFree(dlens);
2432   PetscFree(mask);
2433   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2434   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2435   PetscFunctionReturn(0);
2436 }
2437 
2438 
2439 
2440 #undef __FUNC__
2441 #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2442 /*@
2443    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2444 
2445    Input Parameters:
2446 .  mat  - the matrix
2447 .  fact - factor
2448 
2449    Collective on Mat
2450 
2451   Notes:
2452    This can also be set by the command line option: -mat_use_hash_table fact
2453 
2454 .keywords: matrix, hashtable, factor, HT
2455 
2456 .seealso: MatSetOption()
2457 @*/
2458 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2459 {
2460   Mat_MPIBAIJ *baij;
2461 
2462   PetscFunctionBegin;
2463   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2464   if (mat->type != MATMPIBAIJ) {
2465       SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2466   }
2467   baij = (Mat_MPIBAIJ*) mat->data;
2468   baij->ht_fact = fact;
2469   PetscFunctionReturn(0);
2470 }
2471