xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision c3d102fe2dfc19c1832704c537829f04ceb136a2)
1 
2 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
3 
4 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
5 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
7 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
8 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
9 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
10 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
11 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat);
14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*);
15 
16 /*  UGLY, ugly, ugly
17    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
18    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
19    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
20    converts the entries into single precision and then calls ..._MatScalar() to put them
21    into the single precision data structures.
22 */
23 #if defined(PETSC_USE_MAT_SINGLE)
24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
29 #else
30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
31 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
33 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
35 #endif
36 
37 #undef __FUNCT__
38 #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
39 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v)
40 {
41   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
42   PetscErrorCode ierr;
43   PetscInt       i;
44   PetscScalar    *va,*vb;
45   Vec            vtmp;
46 
47   PetscFunctionBegin;
48 
49   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
50   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
51 
52   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
53   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
54   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
55 
56   for (i=0; i<A->m; i++){
57     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
58   }
59 
60   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
61   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
62   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
63 
64   PetscFunctionReturn(0);
65 }
66 
67 EXTERN_C_BEGIN
68 #undef __FUNCT__
69 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
70 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
71 {
72   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
73   PetscErrorCode ierr;
74 
75   PetscFunctionBegin;
76   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
77   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 EXTERN_C_END
81 
82 EXTERN_C_BEGIN
83 #undef __FUNCT__
84 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
85 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
86 {
87   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
88   PetscErrorCode ierr;
89 
90   PetscFunctionBegin;
91   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
92   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
93   PetscFunctionReturn(0);
94 }
95 EXTERN_C_END
96 
97 /*
98      Local utility routine that creates a mapping from the global column
99    number to the local number in the off-diagonal part of the local
100    storage of the matrix.  This is done in a non scable way since the
101    length of colmap equals the global matrix length.
102 */
103 #undef __FUNCT__
104 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
105 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
106 {
107   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
108   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
109   PetscErrorCode ierr;
110   PetscInt       nbs = B->nbs,i,bs=mat->bs;
111 
112   PetscFunctionBegin;
113 #if defined (PETSC_USE_CTABLE)
114   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
115   for (i=0; i<nbs; i++){
116     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
117   }
118 #else
119   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
120   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
121   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
122   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
123 #endif
124   PetscFunctionReturn(0);
125 }
126 
127 #define CHUNKSIZE  10
128 
129 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
130 { \
131  \
132     brow = row/bs;  \
133     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
134     rmax = aimax[brow]; nrow = ailen[brow]; \
135       bcol = col/bs; \
136       ridx = row % bs; cidx = col % bs; \
137       low = 0; high = nrow; \
138       while (high-low > 3) { \
139         t = (low+high)/2; \
140         if (rp[t] > bcol) high = t; \
141         else              low  = t; \
142       } \
143       for (_i=low; _i<high; _i++) { \
144         if (rp[_i] > bcol) break; \
145         if (rp[_i] == bcol) { \
146           bap  = ap +  bs2*_i + bs*cidx + ridx; \
147           if (addv == ADD_VALUES) *bap += value;  \
148           else                    *bap  = value;  \
149           goto a_noinsert; \
150         } \
151       } \
152       if (a->nonew == 1) goto a_noinsert; \
153       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
154       if (nrow >= rmax) { \
155         /* there is no extra room in row, therefore enlarge */ \
156         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
157         MatScalar *new_a; \
158  \
159         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
160  \
161         /* malloc new storage space */ \
162         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \
163         ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
164         new_j   = (PetscInt*)(new_a + bs2*new_nz); \
165         new_i   = new_j + new_nz; \
166  \
167         /* copy over old data into new slots */ \
168         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
169         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
170         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
171         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
172         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
173         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
174         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
175         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
176                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
177         /* free up old matrix storage */ \
178         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
179         if (!a->singlemalloc) { \
180           ierr = PetscFree(a->i);CHKERRQ(ierr); \
181           ierr = PetscFree(a->j);CHKERRQ(ierr);\
182         } \
183         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
184         a->singlemalloc = PETSC_TRUE; \
185  \
186         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
187         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
188         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \
189         a->maxnz += bs2*CHUNKSIZE; \
190         a->reallocs++; \
191         a->nz++; \
192       } \
193       N = nrow++ - 1;  \
194       /* shift up all the later entries in this row */ \
195       for (ii=N; ii>=_i; ii--) { \
196         rp[ii+1] = rp[ii]; \
197         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198       } \
199       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
200       rp[_i]                      = bcol;  \
201       ap[bs2*_i + bs*cidx + ridx] = value;  \
202       a_noinsert:; \
203     ailen[brow] = nrow; \
204 }
205 
206 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
207 { \
208     brow = row/bs;  \
209     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
210     rmax = bimax[brow]; nrow = bilen[brow]; \
211       bcol = col/bs; \
212       ridx = row % bs; cidx = col % bs; \
213       low = 0; high = nrow; \
214       while (high-low > 3) { \
215         t = (low+high)/2; \
216         if (rp[t] > bcol) high = t; \
217         else              low  = t; \
218       } \
219       for (_i=low; _i<high; _i++) { \
220         if (rp[_i] > bcol) break; \
221         if (rp[_i] == bcol) { \
222           bap  = ap +  bs2*_i + bs*cidx + ridx; \
223           if (addv == ADD_VALUES) *bap += value;  \
224           else                    *bap  = value;  \
225           goto b_noinsert; \
226         } \
227       } \
228       if (b->nonew == 1) goto b_noinsert; \
229       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
230       if (nrow >= rmax) { \
231         /* there is no extra room in row, therefore enlarge */ \
232         PetscInt       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
233         MatScalar *new_a; \
234  \
235         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
236  \
237         /* malloc new storage space */ \
238         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \
239         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
240         new_j   = (PetscInt*)(new_a + bs2*new_nz); \
241         new_i   = new_j + new_nz; \
242  \
243         /* copy over old data into new slots */ \
244         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
245         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
246         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
247         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
248         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
249         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
250         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
251         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
252                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
253         /* free up old matrix storage */ \
254         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
255         if (!b->singlemalloc) { \
256           ierr = PetscFree(b->i);CHKERRQ(ierr); \
257           ierr = PetscFree(b->j);CHKERRQ(ierr); \
258         } \
259         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
260         b->singlemalloc = PETSC_TRUE; \
261  \
262         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
263         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
264         ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \
265         b->maxnz += bs2*CHUNKSIZE; \
266         b->reallocs++; \
267         b->nz++; \
268       } \
269       N = nrow++ - 1;  \
270       /* shift up all the later entries in this row */ \
271       for (ii=N; ii>=_i; ii--) { \
272         rp[ii+1] = rp[ii]; \
273         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
274       } \
275       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
276       rp[_i]                      = bcol;  \
277       ap[bs2*_i + bs*cidx + ridx] = value;  \
278       b_noinsert:; \
279     bilen[brow] = nrow; \
280 }
281 
282 #if defined(PETSC_USE_MAT_SINGLE)
283 #undef __FUNCT__
284 #define __FUNCT__ "MatSetValues_MPIBAIJ"
285 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
286 {
287   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
288   PetscErrorCode ierr;
289   PetscInt       i,N = m*n;
290   MatScalar      *vsingle;
291 
292   PetscFunctionBegin;
293   if (N > b->setvalueslen) {
294     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
295     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
296     b->setvalueslen  = N;
297   }
298   vsingle = b->setvaluescopy;
299 
300   for (i=0; i<N; i++) {
301     vsingle[i] = v[i];
302   }
303   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
304   PetscFunctionReturn(0);
305 }
306 
307 #undef __FUNCT__
308 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
309 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
310 {
311   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
312   PetscErrorCode ierr;
313   PetscInt       i,N = m*n*b->bs2;
314   MatScalar      *vsingle;
315 
316   PetscFunctionBegin;
317   if (N > b->setvalueslen) {
318     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
319     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
320     b->setvalueslen  = N;
321   }
322   vsingle = b->setvaluescopy;
323   for (i=0; i<N; i++) {
324     vsingle[i] = v[i];
325   }
326   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
327   PetscFunctionReturn(0);
328 }
329 
330 #undef __FUNCT__
331 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
332 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
333 {
334   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
335   PetscErrorCode ierr;
336   PetscInt       i,N = m*n;
337   MatScalar      *vsingle;
338 
339   PetscFunctionBegin;
340   if (N > b->setvalueslen) {
341     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
342     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
343     b->setvalueslen  = N;
344   }
345   vsingle = b->setvaluescopy;
346   for (i=0; i<N; i++) {
347     vsingle[i] = v[i];
348   }
349   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
350   PetscFunctionReturn(0);
351 }
352 
353 #undef __FUNCT__
354 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
355 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
356 {
357   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
358   PetscErrorCode ierr;
359   PetscInt       i,N = m*n*b->bs2;
360   MatScalar      *vsingle;
361 
362   PetscFunctionBegin;
363   if (N > b->setvalueslen) {
364     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
365     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
366     b->setvalueslen  = N;
367   }
368   vsingle = b->setvaluescopy;
369   for (i=0; i<N; i++) {
370     vsingle[i] = v[i];
371   }
372   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
373   PetscFunctionReturn(0);
374 }
375 #endif
376 
377 #undef __FUNCT__
378 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
379 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
380 {
381   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
382   MatScalar      value;
383   PetscTruth     roworiented = baij->roworiented;
384   PetscErrorCode ierr;
385   PetscInt       i,j,row,col;
386   PetscInt       rstart_orig=baij->rstart_bs;
387   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
388   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
389 
390   /* Some Variables required in the macro */
391   Mat            A = baij->A;
392   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
393   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
394   MatScalar      *aa=a->a;
395 
396   Mat            B = baij->B;
397   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
398   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
399   MatScalar      *ba=b->a;
400 
401   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
402   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
403   MatScalar      *ap,*bap;
404 
405   PetscFunctionBegin;
406   for (i=0; i<m; i++) {
407     if (im[i] < 0) continue;
408 #if defined(PETSC_USE_DEBUG)
409     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
410 #endif
411     if (im[i] >= rstart_orig && im[i] < rend_orig) {
412       row = im[i] - rstart_orig;
413       for (j=0; j<n; j++) {
414         if (in[j] >= cstart_orig && in[j] < cend_orig){
415           col = in[j] - cstart_orig;
416           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
417           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
418           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
419         } else if (in[j] < 0) continue;
420 #if defined(PETSC_USE_DEBUG)
421         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->N-1);}
422 #endif
423         else {
424           if (mat->was_assembled) {
425             if (!baij->colmap) {
426               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
427             }
428 #if defined (PETSC_USE_CTABLE)
429             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
430             col  = col - 1;
431 #else
432             col = baij->colmap[in[j]/bs] - 1;
433 #endif
434             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
435               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
436               col =  in[j];
437               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
438               B = baij->B;
439               b = (Mat_SeqBAIJ*)(B)->data;
440               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
441               ba=b->a;
442             } else col += in[j]%bs;
443           } else col = in[j];
444           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
445           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
446           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
447         }
448       }
449     } else {
450       if (!baij->donotstash) {
451         if (roworiented) {
452           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
453         } else {
454           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
455         }
456       }
457     }
458   }
459   PetscFunctionReturn(0);
460 }
461 
462 #undef __FUNCT__
463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
465 {
466   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
467   const MatScalar *value;
468   MatScalar       *barray=baij->barray;
469   PetscTruth      roworiented = baij->roworiented;
470   PetscErrorCode  ierr;
471   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
472   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
473   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
474 
475   PetscFunctionBegin;
476   if(!barray) {
477     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
478     baij->barray = barray;
479   }
480 
481   if (roworiented) {
482     stepval = (n-1)*bs;
483   } else {
484     stepval = (m-1)*bs;
485   }
486   for (i=0; i<m; i++) {
487     if (im[i] < 0) continue;
488 #if defined(PETSC_USE_DEBUG)
489     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
490 #endif
491     if (im[i] >= rstart && im[i] < rend) {
492       row = im[i] - rstart;
493       for (j=0; j<n; j++) {
494         /* If NumCol = 1 then a copy is not required */
495         if ((roworiented) && (n == 1)) {
496           barray = (MatScalar*)v + i*bs2;
497         } else if((!roworiented) && (m == 1)) {
498           barray = (MatScalar*)v + j*bs2;
499         } else { /* Here a copy is required */
500           if (roworiented) {
501             value = v + i*(stepval+bs)*bs + j*bs;
502           } else {
503             value = v + j*(stepval+bs)*bs + i*bs;
504           }
505           for (ii=0; ii<bs; ii++,value+=stepval) {
506             for (jj=0; jj<bs; jj++) {
507               *barray++  = *value++;
508             }
509           }
510           barray -=bs2;
511         }
512 
513         if (in[j] >= cstart && in[j] < cend){
514           col  = in[j] - cstart;
515           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
516         }
517         else if (in[j] < 0) continue;
518 #if defined(PETSC_USE_DEBUG)
519         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
520 #endif
521         else {
522           if (mat->was_assembled) {
523             if (!baij->colmap) {
524               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
525             }
526 
527 #if defined(PETSC_USE_DEBUG)
528 #if defined (PETSC_USE_CTABLE)
529             { PetscInt data;
530               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
531               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
532             }
533 #else
534             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
535 #endif
536 #endif
537 #if defined (PETSC_USE_CTABLE)
538 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
539             col  = (col - 1)/bs;
540 #else
541             col = (baij->colmap[in[j]] - 1)/bs;
542 #endif
543             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
544               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
545               col =  in[j];
546             }
547           }
548           else col = in[j];
549           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
550         }
551       }
552     } else {
553       if (!baij->donotstash) {
554         if (roworiented) {
555           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
556         } else {
557           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
558         }
559       }
560     }
561   }
562   PetscFunctionReturn(0);
563 }
564 
565 #define HASH_KEY 0.6180339887
566 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
567 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
568 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
569 #undef __FUNCT__
570 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
571 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
572 {
573   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
574   PetscTruth     roworiented = baij->roworiented;
575   PetscErrorCode ierr;
576   PetscInt       i,j,row,col;
577   PetscInt       rstart_orig=baij->rstart_bs;
578   PetscInt       rend_orig=baij->rend_bs,Nbs=baij->Nbs;
579   PetscInt       h1,key,size=baij->ht_size,bs=mat->bs,*HT=baij->ht,idx;
580   PetscReal      tmp;
581   MatScalar      **HD = baij->hd,value;
582 #if defined(PETSC_USE_DEBUG)
583   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
584 #endif
585 
586   PetscFunctionBegin;
587 
588   for (i=0; i<m; i++) {
589 #if defined(PETSC_USE_DEBUG)
590     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
591     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
592 #endif
593       row = im[i];
594     if (row >= rstart_orig && row < rend_orig) {
595       for (j=0; j<n; j++) {
596         col = in[j];
597         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
598         /* Look up PetscInto the Hash Table */
599         key = (row/bs)*Nbs+(col/bs)+1;
600         h1  = HASH(size,key,tmp);
601 
602 
603         idx = h1;
604 #if defined(PETSC_USE_DEBUG)
605         insert_ct++;
606         total_ct++;
607         if (HT[idx] != key) {
608           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
609           if (idx == size) {
610             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
611             if (idx == h1) {
612               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
613             }
614           }
615         }
616 #else
617         if (HT[idx] != key) {
618           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
619           if (idx == size) {
620             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
621             if (idx == h1) {
622               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
623             }
624           }
625         }
626 #endif
627         /* A HASH table entry is found, so insert the values at the correct address */
628         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
629         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
630       }
631     } else {
632       if (!baij->donotstash) {
633         if (roworiented) {
634           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
635         } else {
636           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
637         }
638       }
639     }
640   }
641 #if defined(PETSC_USE_DEBUG)
642   baij->ht_total_ct = total_ct;
643   baij->ht_insert_ct = insert_ct;
644 #endif
645   PetscFunctionReturn(0);
646 }
647 
648 #undef __FUNCT__
649 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
650 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
651 {
652   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
653   PetscTruth      roworiented = baij->roworiented;
654   PetscErrorCode  ierr;
655   PetscInt        i,j,ii,jj,row,col;
656   PetscInt        rstart=baij->rstart ;
657   PetscInt        rend=baij->rend,stepval,bs=mat->bs,bs2=baij->bs2;
658   PetscInt        h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
659   PetscReal       tmp;
660   MatScalar       **HD = baij->hd,*baij_a;
661   const MatScalar *v_t,*value;
662 #if defined(PETSC_USE_DEBUG)
663   PetscInt        total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
664 #endif
665 
666   PetscFunctionBegin;
667 
668   if (roworiented) {
669     stepval = (n-1)*bs;
670   } else {
671     stepval = (m-1)*bs;
672   }
673   for (i=0; i<m; i++) {
674 #if defined(PETSC_USE_DEBUG)
675     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
676     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
677 #endif
678     row   = im[i];
679     v_t   = v + i*bs2;
680     if (row >= rstart && row < rend) {
681       for (j=0; j<n; j++) {
682         col = in[j];
683 
684         /* Look up into the Hash Table */
685         key = row*Nbs+col+1;
686         h1  = HASH(size,key,tmp);
687 
688         idx = h1;
689 #if defined(PETSC_USE_DEBUG)
690         total_ct++;
691         insert_ct++;
692        if (HT[idx] != key) {
693           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
694           if (idx == size) {
695             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
696             if (idx == h1) {
697               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
698             }
699           }
700         }
701 #else
702         if (HT[idx] != key) {
703           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
704           if (idx == size) {
705             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
706             if (idx == h1) {
707               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
708             }
709           }
710         }
711 #endif
712         baij_a = HD[idx];
713         if (roworiented) {
714           /*value = v + i*(stepval+bs)*bs + j*bs;*/
715           /* value = v + (i*(stepval+bs)+j)*bs; */
716           value = v_t;
717           v_t  += bs;
718           if (addv == ADD_VALUES) {
719             for (ii=0; ii<bs; ii++,value+=stepval) {
720               for (jj=ii; jj<bs2; jj+=bs) {
721                 baij_a[jj]  += *value++;
722               }
723             }
724           } else {
725             for (ii=0; ii<bs; ii++,value+=stepval) {
726               for (jj=ii; jj<bs2; jj+=bs) {
727                 baij_a[jj]  = *value++;
728               }
729             }
730           }
731         } else {
732           value = v + j*(stepval+bs)*bs + i*bs;
733           if (addv == ADD_VALUES) {
734             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
735               for (jj=0; jj<bs; jj++) {
736                 baij_a[jj]  += *value++;
737               }
738             }
739           } else {
740             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
741               for (jj=0; jj<bs; jj++) {
742                 baij_a[jj]  = *value++;
743               }
744             }
745           }
746         }
747       }
748     } else {
749       if (!baij->donotstash) {
750         if (roworiented) {
751           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
752         } else {
753           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
754         }
755       }
756     }
757   }
758 #if defined(PETSC_USE_DEBUG)
759   baij->ht_total_ct = total_ct;
760   baij->ht_insert_ct = insert_ct;
761 #endif
762   PetscFunctionReturn(0);
763 }
764 
765 #undef __FUNCT__
766 #define __FUNCT__ "MatGetValues_MPIBAIJ"
767 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
768 {
769   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
770   PetscErrorCode ierr;
771   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
772   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
773 
774   PetscFunctionBegin;
775   for (i=0; i<m; i++) {
776     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
777     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
778     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
779       row = idxm[i] - bsrstart;
780       for (j=0; j<n; j++) {
781         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
782         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
783         if (idxn[j] >= bscstart && idxn[j] < bscend){
784           col = idxn[j] - bscstart;
785           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
786         } else {
787           if (!baij->colmap) {
788             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
789           }
790 #if defined (PETSC_USE_CTABLE)
791           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
792           data --;
793 #else
794           data = baij->colmap[idxn[j]/bs]-1;
795 #endif
796           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
797           else {
798             col  = data + idxn[j]%bs;
799             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
800           }
801         }
802       }
803     } else {
804       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
805     }
806   }
807  PetscFunctionReturn(0);
808 }
809 
810 #undef __FUNCT__
811 #define __FUNCT__ "MatNorm_MPIBAIJ"
812 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
813 {
814   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
815   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
816   PetscErrorCode ierr;
817   PetscInt       i,bs2=baij->bs2;
818   PetscReal      sum = 0.0;
819   MatScalar      *v;
820 
821   PetscFunctionBegin;
822   if (baij->size == 1) {
823     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
824   } else {
825     if (type == NORM_FROBENIUS) {
826       v = amat->a;
827       for (i=0; i<amat->nz*bs2; i++) {
828 #if defined(PETSC_USE_COMPLEX)
829         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
830 #else
831         sum += (*v)*(*v); v++;
832 #endif
833       }
834       v = bmat->a;
835       for (i=0; i<bmat->nz*bs2; i++) {
836 #if defined(PETSC_USE_COMPLEX)
837         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
838 #else
839         sum += (*v)*(*v); v++;
840 #endif
841       }
842       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
843       *nrm = sqrt(*nrm);
844     } else {
845       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
846     }
847   }
848   PetscFunctionReturn(0);
849 }
850 
851 
852 /*
853   Creates the hash table, and sets the table
854   This table is created only once.
855   If new entried need to be added to the matrix
856   then the hash table has to be destroyed and
857   recreated.
858 */
859 #undef __FUNCT__
860 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
861 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
862 {
863   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
864   Mat            A = baij->A,B=baij->B;
865   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
866   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
867   PetscErrorCode ierr;
868   PetscInt       size,bs2=baij->bs2,rstart=baij->rstart;
869   PetscInt       cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
870   PetscInt       *HT,key;
871   MatScalar      **HD;
872   PetscReal      tmp;
873 #if defined(PETSC_USE_DEBUG)
874   PetscInt       ct=0,max=0;
875 #endif
876 
877   PetscFunctionBegin;
878   baij->ht_size=(PetscInt)(factor*nz);
879   size = baij->ht_size;
880 
881   if (baij->ht) {
882     PetscFunctionReturn(0);
883   }
884 
885   /* Allocate Memory for Hash Table */
886   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
887   baij->ht = (PetscInt*)(baij->hd + size);
888   HD       = baij->hd;
889   HT       = baij->ht;
890 
891 
892   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
893 
894 
895   /* Loop Over A */
896   for (i=0; i<a->mbs; i++) {
897     for (j=ai[i]; j<ai[i+1]; j++) {
898       row = i+rstart;
899       col = aj[j]+cstart;
900 
901       key = row*Nbs + col + 1;
902       h1  = HASH(size,key,tmp);
903       for (k=0; k<size; k++){
904         if (!HT[(h1+k)%size]) {
905           HT[(h1+k)%size] = key;
906           HD[(h1+k)%size] = a->a + j*bs2;
907           break;
908 #if defined(PETSC_USE_DEBUG)
909         } else {
910           ct++;
911 #endif
912         }
913       }
914 #if defined(PETSC_USE_DEBUG)
915       if (k> max) max = k;
916 #endif
917     }
918   }
919   /* Loop Over B */
920   for (i=0; i<b->mbs; i++) {
921     for (j=bi[i]; j<bi[i+1]; j++) {
922       row = i+rstart;
923       col = garray[bj[j]];
924       key = row*Nbs + col + 1;
925       h1  = HASH(size,key,tmp);
926       for (k=0; k<size; k++){
927         if (!HT[(h1+k)%size]) {
928           HT[(h1+k)%size] = key;
929           HD[(h1+k)%size] = b->a + j*bs2;
930           break;
931 #if defined(PETSC_USE_DEBUG)
932         } else {
933           ct++;
934 #endif
935         }
936       }
937 #if defined(PETSC_USE_DEBUG)
938       if (k> max) max = k;
939 #endif
940     }
941   }
942 
943   /* Print Summary */
944 #if defined(PETSC_USE_DEBUG)
945   for (i=0,j=0; i<size; i++) {
946     if (HT[i]) {j++;}
947   }
948   PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);
949 #endif
950   PetscFunctionReturn(0);
951 }
952 
953 #undef __FUNCT__
954 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
955 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
956 {
957   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
958   PetscErrorCode ierr;
959   PetscInt       nstash,reallocs;
960   InsertMode     addv;
961 
962   PetscFunctionBegin;
963   if (baij->donotstash) {
964     PetscFunctionReturn(0);
965   }
966 
967   /* make sure all processors are either in INSERTMODE or ADDMODE */
968   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
969   if (addv == (ADD_VALUES|INSERT_VALUES)) {
970     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
971   }
972   mat->insertmode = addv; /* in case this processor had no cache */
973 
974   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
975   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
976   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
977   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
978   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
979   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
980   PetscFunctionReturn(0);
981 }
982 
983 #undef __FUNCT__
984 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
985 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
986 {
987   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
988   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
989   PetscErrorCode ierr;
990   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
991   PetscInt       *row,*col,other_disassembled;
992   PetscTruth     r1,r2,r3;
993   MatScalar      *val;
994   InsertMode     addv = mat->insertmode;
995   PetscMPIInt    n;
996 
997   PetscFunctionBegin;
998   if (!baij->donotstash) {
999     while (1) {
1000       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1001       if (!flg) break;
1002 
1003       for (i=0; i<n;) {
1004         /* Now identify the consecutive vals belonging to the same row */
1005         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1006         if (j < n) ncols = j-i;
1007         else       ncols = n-i;
1008         /* Now assemble all these values with a single function call */
1009         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
1010         i = j;
1011       }
1012     }
1013     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1014     /* Now process the block-stash. Since the values are stashed column-oriented,
1015        set the roworiented flag to column oriented, and after MatSetValues()
1016        restore the original flags */
1017     r1 = baij->roworiented;
1018     r2 = a->roworiented;
1019     r3 = b->roworiented;
1020     baij->roworiented = PETSC_FALSE;
1021     a->roworiented    = PETSC_FALSE;
1022     b->roworiented    = PETSC_FALSE;
1023     while (1) {
1024       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1025       if (!flg) break;
1026 
1027       for (i=0; i<n;) {
1028         /* Now identify the consecutive vals belonging to the same row */
1029         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1030         if (j < n) ncols = j-i;
1031         else       ncols = n-i;
1032         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1033         i = j;
1034       }
1035     }
1036     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1037     baij->roworiented = r1;
1038     a->roworiented    = r2;
1039     b->roworiented    = r3;
1040   }
1041 
1042   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1043   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1044 
1045   /* determine if any processor has disassembled, if so we must
1046      also disassemble ourselfs, in order that we may reassemble. */
1047   /*
1048      if nonzero structure of submatrix B cannot change then we know that
1049      no processor disassembled thus we can skip this stuff
1050   */
1051   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1052     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1053     if (mat->was_assembled && !other_disassembled) {
1054       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1055     }
1056   }
1057 
1058   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1059     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1060   }
1061   b->compressedrow.use = PETSC_TRUE;
1062   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1063   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1064 
1065 #if defined(PETSC_USE_DEBUG)
1066   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1067     PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
1068     baij->ht_total_ct  = 0;
1069     baij->ht_insert_ct = 0;
1070   }
1071 #endif
1072   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1073     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1074     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1075     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1076   }
1077 
1078   if (baij->rowvalues) {
1079     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1080     baij->rowvalues = 0;
1081   }
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 #undef __FUNCT__
1086 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1087 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1088 {
1089   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1090   PetscErrorCode    ierr;
1091   PetscMPIInt       size = baij->size,rank = baij->rank;
1092   PetscInt          bs = mat->bs;
1093   PetscTruth        iascii,isdraw;
1094   PetscViewer       sviewer;
1095   PetscViewerFormat format;
1096 
1097   PetscFunctionBegin;
1098   /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */
1099   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1100   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1101   if (iascii) {
1102     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1103     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1104       MatInfo info;
1105       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1106       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1107       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
1108               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
1109               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
1110       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1111       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1112       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1113       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1114       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1115       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1116       PetscFunctionReturn(0);
1117     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1118       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1119       PetscFunctionReturn(0);
1120     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1121       PetscFunctionReturn(0);
1122     }
1123   }
1124 
1125   if (isdraw) {
1126     PetscDraw       draw;
1127     PetscTruth isnull;
1128     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1129     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1130   }
1131 
1132   if (size == 1) {
1133     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1134     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1135   } else {
1136     /* assemble the entire matrix onto first processor. */
1137     Mat         A;
1138     Mat_SeqBAIJ *Aloc;
1139     PetscInt    M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1140     MatScalar   *a;
1141 
1142     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1143     /* Perhaps this should be the type of mat? */
1144     if (!rank) {
1145       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
1146     } else {
1147       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
1148     }
1149     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1150     ierr = MatMPIBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1151     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1152 
1153     /* copy over the A part */
1154     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1155     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1156     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1157 
1158     for (i=0; i<mbs; i++) {
1159       rvals[0] = bs*(baij->rstart + i);
1160       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1161       for (j=ai[i]; j<ai[i+1]; j++) {
1162         col = (baij->cstart+aj[j])*bs;
1163         for (k=0; k<bs; k++) {
1164           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1165           col++; a += bs;
1166         }
1167       }
1168     }
1169     /* copy over the B part */
1170     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1171     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1172     for (i=0; i<mbs; i++) {
1173       rvals[0] = bs*(baij->rstart + i);
1174       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1175       for (j=ai[i]; j<ai[i+1]; j++) {
1176         col = baij->garray[aj[j]]*bs;
1177         for (k=0; k<bs; k++) {
1178           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1179           col++; a += bs;
1180         }
1181       }
1182     }
1183     ierr = PetscFree(rvals);CHKERRQ(ierr);
1184     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1185     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1186     /*
1187        Everyone has to call to draw the matrix since the graphics waits are
1188        synchronized across all processors that share the PetscDraw object
1189     */
1190     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1191     if (!rank) {
1192       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1193       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1194     }
1195     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1196     ierr = MatDestroy(A);CHKERRQ(ierr);
1197   }
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 #undef __FUNCT__
1202 #define __FUNCT__ "MatView_MPIBAIJ"
1203 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1204 {
1205   PetscErrorCode ierr;
1206   PetscTruth     iascii,isdraw,issocket,isbinary;
1207 
1208   PetscFunctionBegin;
1209   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1210   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1211   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1212   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1213   if (iascii || isdraw || issocket || isbinary) {
1214     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1215   } else {
1216     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1217   }
1218   PetscFunctionReturn(0);
1219 }
1220 
1221 #undef __FUNCT__
1222 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1223 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1224 {
1225   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1226   PetscErrorCode ierr;
1227 
1228   PetscFunctionBegin;
1229 #if defined(PETSC_USE_LOG)
1230   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
1231 #endif
1232   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1233   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1234   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1235   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1236   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1237 #if defined (PETSC_USE_CTABLE)
1238   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1239 #else
1240   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1241 #endif
1242   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1243   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1244   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1245   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1246   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1247   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1248 #if defined(PETSC_USE_MAT_SINGLE)
1249   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1250 #endif
1251   ierr = PetscFree(baij);CHKERRQ(ierr);
1252 
1253   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1254   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1255   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1256   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1257   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1258   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1259   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatMult_MPIBAIJ"
1265 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1266 {
1267   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1268   PetscErrorCode ierr;
1269   PetscInt       nt;
1270 
1271   PetscFunctionBegin;
1272   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1273   if (nt != A->n) {
1274     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1275   }
1276   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1277   if (nt != A->m) {
1278     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1279   }
1280   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1281   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1282   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1283   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1284   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 #undef __FUNCT__
1289 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1290 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1291 {
1292   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1293   PetscErrorCode ierr;
1294 
1295   PetscFunctionBegin;
1296   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1297   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1298   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1299   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1305 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1306 {
1307   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1308   PetscErrorCode ierr;
1309   PetscTruth     merged;
1310 
1311   PetscFunctionBegin;
1312   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1313   /* do nondiagonal part */
1314   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1315   if (!merged) {
1316     /* send it on its way */
1317     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1318     /* do local part */
1319     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1320     /* receive remote parts: note this assumes the values are not actually */
1321     /* inserted in yy until the next line */
1322     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1323   } else {
1324     /* do local part */
1325     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1326     /* send it on its way */
1327     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1328     /* values actually were received in the Begin() but we need to call this nop */
1329     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1330   }
1331   PetscFunctionReturn(0);
1332 }
1333 
1334 #undef __FUNCT__
1335 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1336 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1337 {
1338   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1339   PetscErrorCode ierr;
1340 
1341   PetscFunctionBegin;
1342   /* do nondiagonal part */
1343   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1344   /* send it on its way */
1345   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1346   /* do local part */
1347   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1348   /* receive remote parts: note this assumes the values are not actually */
1349   /* inserted in yy until the next line, which is true for my implementation*/
1350   /* but is not perhaps always true. */
1351   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1352   PetscFunctionReturn(0);
1353 }
1354 
1355 /*
1356   This only works correctly for square matrices where the subblock A->A is the
1357    diagonal block
1358 */
1359 #undef __FUNCT__
1360 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1361 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1362 {
1363   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1364   PetscErrorCode ierr;
1365 
1366   PetscFunctionBegin;
1367   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1368   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1369   PetscFunctionReturn(0);
1370 }
1371 
1372 #undef __FUNCT__
1373 #define __FUNCT__ "MatScale_MPIBAIJ"
1374 PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A)
1375 {
1376   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1377   PetscErrorCode ierr;
1378 
1379   PetscFunctionBegin;
1380   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1381   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1382   PetscFunctionReturn(0);
1383 }
1384 
1385 #undef __FUNCT__
1386 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1387 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1388 {
1389   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1390   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1391   PetscErrorCode ierr;
1392   PetscInt       bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1393   PetscInt       nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1394   PetscInt       *cmap,*idx_p,cstart = mat->cstart;
1395 
1396   PetscFunctionBegin;
1397   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1398   mat->getrowactive = PETSC_TRUE;
1399 
1400   if (!mat->rowvalues && (idx || v)) {
1401     /*
1402         allocate enough space to hold information from the longest row.
1403     */
1404     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1405     PetscInt     max = 1,mbs = mat->mbs,tmp;
1406     for (i=0; i<mbs; i++) {
1407       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1408       if (max < tmp) { max = tmp; }
1409     }
1410     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1411     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1412   }
1413 
1414   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1415   lrow = row - brstart;
1416 
1417   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1418   if (!v)   {pvA = 0; pvB = 0;}
1419   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1420   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1421   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1422   nztot = nzA + nzB;
1423 
1424   cmap  = mat->garray;
1425   if (v  || idx) {
1426     if (nztot) {
1427       /* Sort by increasing column numbers, assuming A and B already sorted */
1428       PetscInt imark = -1;
1429       if (v) {
1430         *v = v_p = mat->rowvalues;
1431         for (i=0; i<nzB; i++) {
1432           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1433           else break;
1434         }
1435         imark = i;
1436         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1437         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1438       }
1439       if (idx) {
1440         *idx = idx_p = mat->rowindices;
1441         if (imark > -1) {
1442           for (i=0; i<imark; i++) {
1443             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1444           }
1445         } else {
1446           for (i=0; i<nzB; i++) {
1447             if (cmap[cworkB[i]/bs] < cstart)
1448               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1449             else break;
1450           }
1451           imark = i;
1452         }
1453         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1454         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1455       }
1456     } else {
1457       if (idx) *idx = 0;
1458       if (v)   *v   = 0;
1459     }
1460   }
1461   *nz = nztot;
1462   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1463   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1464   PetscFunctionReturn(0);
1465 }
1466 
1467 #undef __FUNCT__
1468 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1469 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1470 {
1471   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1472 
1473   PetscFunctionBegin;
1474   if (!baij->getrowactive) {
1475     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1476   }
1477   baij->getrowactive = PETSC_FALSE;
1478   PetscFunctionReturn(0);
1479 }
1480 
1481 #undef __FUNCT__
1482 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1483 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1484 {
1485   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1486   PetscErrorCode ierr;
1487 
1488   PetscFunctionBegin;
1489   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1490   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1491   PetscFunctionReturn(0);
1492 }
1493 
1494 #undef __FUNCT__
1495 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1496 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1497 {
1498   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1499   Mat            A = a->A,B = a->B;
1500   PetscErrorCode ierr;
1501   PetscReal      isend[5],irecv[5];
1502 
1503   PetscFunctionBegin;
1504   info->block_size     = (PetscReal)matin->bs;
1505   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1506   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1507   isend[3] = info->memory;  isend[4] = info->mallocs;
1508   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1509   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1510   isend[3] += info->memory;  isend[4] += info->mallocs;
1511   if (flag == MAT_LOCAL) {
1512     info->nz_used      = isend[0];
1513     info->nz_allocated = isend[1];
1514     info->nz_unneeded  = isend[2];
1515     info->memory       = isend[3];
1516     info->mallocs      = isend[4];
1517   } else if (flag == MAT_GLOBAL_MAX) {
1518     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1519     info->nz_used      = irecv[0];
1520     info->nz_allocated = irecv[1];
1521     info->nz_unneeded  = irecv[2];
1522     info->memory       = irecv[3];
1523     info->mallocs      = irecv[4];
1524   } else if (flag == MAT_GLOBAL_SUM) {
1525     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1526     info->nz_used      = irecv[0];
1527     info->nz_allocated = irecv[1];
1528     info->nz_unneeded  = irecv[2];
1529     info->memory       = irecv[3];
1530     info->mallocs      = irecv[4];
1531   } else {
1532     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1533   }
1534   info->rows_global       = (PetscReal)A->M;
1535   info->columns_global    = (PetscReal)A->N;
1536   info->rows_local        = (PetscReal)A->m;
1537   info->columns_local     = (PetscReal)A->N;
1538   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1539   info->fill_ratio_needed = 0;
1540   info->factor_mallocs    = 0;
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 #undef __FUNCT__
1545 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1546 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op)
1547 {
1548   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1549   PetscErrorCode ierr;
1550 
1551   PetscFunctionBegin;
1552   switch (op) {
1553   case MAT_NO_NEW_NONZERO_LOCATIONS:
1554   case MAT_YES_NEW_NONZERO_LOCATIONS:
1555   case MAT_COLUMNS_UNSORTED:
1556   case MAT_COLUMNS_SORTED:
1557   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1558   case MAT_KEEP_ZEROED_ROWS:
1559   case MAT_NEW_NONZERO_LOCATION_ERR:
1560     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1561     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1562     break;
1563   case MAT_ROW_ORIENTED:
1564     a->roworiented = PETSC_TRUE;
1565     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1566     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1567     break;
1568   case MAT_ROWS_SORTED:
1569   case MAT_ROWS_UNSORTED:
1570   case MAT_YES_NEW_DIAGONALS:
1571     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1572     break;
1573   case MAT_COLUMN_ORIENTED:
1574     a->roworiented = PETSC_FALSE;
1575     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1576     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1577     break;
1578   case MAT_IGNORE_OFF_PROC_ENTRIES:
1579     a->donotstash = PETSC_TRUE;
1580     break;
1581   case MAT_NO_NEW_DIAGONALS:
1582     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1583   case MAT_USE_HASH_TABLE:
1584     a->ht_flag = PETSC_TRUE;
1585     break;
1586   case MAT_SYMMETRIC:
1587   case MAT_STRUCTURALLY_SYMMETRIC:
1588   case MAT_HERMITIAN:
1589   case MAT_SYMMETRY_ETERNAL:
1590     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1591     break;
1592   case MAT_NOT_SYMMETRIC:
1593   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1594   case MAT_NOT_HERMITIAN:
1595   case MAT_NOT_SYMMETRY_ETERNAL:
1596     break;
1597   default:
1598     SETERRQ(PETSC_ERR_SUP,"unknown option");
1599   }
1600   PetscFunctionReturn(0);
1601 }
1602 
1603 #undef __FUNCT__
1604 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1605 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1606 {
1607   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1608   Mat_SeqBAIJ    *Aloc;
1609   Mat            B;
1610   PetscErrorCode ierr;
1611   PetscInt       M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1612   PetscInt       bs=A->bs,mbs=baij->mbs;
1613   MatScalar      *a;
1614 
1615   PetscFunctionBegin;
1616   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1617   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1618   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1619   ierr = MatMPIBAIJSetPreallocation(B,A->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1620 
1621   /* copy over the A part */
1622   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1623   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1624   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1625 
1626   for (i=0; i<mbs; i++) {
1627     rvals[0] = bs*(baij->rstart + i);
1628     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1629     for (j=ai[i]; j<ai[i+1]; j++) {
1630       col = (baij->cstart+aj[j])*bs;
1631       for (k=0; k<bs; k++) {
1632         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1633         col++; a += bs;
1634       }
1635     }
1636   }
1637   /* copy over the B part */
1638   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1639   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1640   for (i=0; i<mbs; i++) {
1641     rvals[0] = bs*(baij->rstart + i);
1642     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1643     for (j=ai[i]; j<ai[i+1]; j++) {
1644       col = baij->garray[aj[j]]*bs;
1645       for (k=0; k<bs; k++) {
1646         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1647         col++; a += bs;
1648       }
1649     }
1650   }
1651   ierr = PetscFree(rvals);CHKERRQ(ierr);
1652   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1653   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1654 
1655   if (matout) {
1656     *matout = B;
1657   } else {
1658     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1659   }
1660   PetscFunctionReturn(0);
1661 }
1662 
1663 #undef __FUNCT__
1664 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1665 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1666 {
1667   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1668   Mat            a = baij->A,b = baij->B;
1669   PetscErrorCode ierr;
1670   PetscInt       s1,s2,s3;
1671 
1672   PetscFunctionBegin;
1673   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1674   if (rr) {
1675     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1676     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1677     /* Overlap communication with computation. */
1678     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1679   }
1680   if (ll) {
1681     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1682     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1683     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1684   }
1685   /* scale  the diagonal block */
1686   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1687 
1688   if (rr) {
1689     /* Do a scatter end and then right scale the off-diagonal block */
1690     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1691     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1692   }
1693 
1694   PetscFunctionReturn(0);
1695 }
1696 
1697 #undef __FUNCT__
1698 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1699 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag)
1700 {
1701   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1702   PetscErrorCode ierr;
1703   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1704   PetscInt       i,N,*rows,*owners = l->rowners;
1705   PetscInt       *nprocs,j,idx,nsends,row;
1706   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1707   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1;
1708   PetscInt       *lens,*lrows,*values,bs=A->bs,rstart_bs=l->rstart_bs;
1709   MPI_Comm       comm = A->comm;
1710   MPI_Request    *send_waits,*recv_waits;
1711   MPI_Status     recv_status,*send_status;
1712   IS             istmp;
1713 #if defined(PETSC_DEBUG)
1714   PetscTruth     found = PETSC_FALSE;
1715 #endif
1716 
1717   PetscFunctionBegin;
1718   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
1719   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1720 
1721   /*  first count number of contributors to each processor */
1722   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1723   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1724   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1725   j = 0;
1726   for (i=0; i<N; i++) {
1727     if (lastidx > (idx = rows[i])) j = 0;
1728     lastidx = idx;
1729     for (; j<size; j++) {
1730       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1731         nprocs[2*j]++;
1732         nprocs[2*j+1] = 1;
1733         owner[i] = j;
1734 #if defined(PETSC_DEBUG)
1735         found = PETSC_TRUE;
1736 #endif
1737         break;
1738       }
1739     }
1740 #if defined(PETSC_DEBUG)
1741     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1742     found = PETSC_FALSE;
1743 #endif
1744   }
1745   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1746 
1747   /* inform other processors of number of messages and max length*/
1748   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1749 
1750   /* post receives:   */
1751   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1752   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1753   for (i=0; i<nrecvs; i++) {
1754     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1755   }
1756 
1757   /* do sends:
1758      1) starts[i] gives the starting index in svalues for stuff going to
1759      the ith processor
1760   */
1761   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1762   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1763   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1764   starts[0]  = 0;
1765   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1766   for (i=0; i<N; i++) {
1767     svalues[starts[owner[i]]++] = rows[i];
1768   }
1769   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1770 
1771   starts[0] = 0;
1772   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1773   count = 0;
1774   for (i=0; i<size; i++) {
1775     if (nprocs[2*i+1]) {
1776       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1777     }
1778   }
1779   ierr = PetscFree(starts);CHKERRQ(ierr);
1780 
1781   base = owners[rank]*bs;
1782 
1783   /*  wait on receives */
1784   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
1785   source = lens + nrecvs;
1786   count  = nrecvs; slen = 0;
1787   while (count) {
1788     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1789     /* unpack receives into our local space */
1790     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1791     source[imdex]  = recv_status.MPI_SOURCE;
1792     lens[imdex]    = n;
1793     slen          += n;
1794     count--;
1795   }
1796   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1797 
1798   /* move the data into the send scatter */
1799   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1800   count = 0;
1801   for (i=0; i<nrecvs; i++) {
1802     values = rvalues + i*nmax;
1803     for (j=0; j<lens[i]; j++) {
1804       lrows[count++] = values[j] - base;
1805     }
1806   }
1807   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1808   ierr = PetscFree(lens);CHKERRQ(ierr);
1809   ierr = PetscFree(owner);CHKERRQ(ierr);
1810   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1811 
1812   /* actually zap the local rows */
1813   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1814   ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr);
1815 
1816   /*
1817         Zero the required rows. If the "diagonal block" of the matrix
1818      is square and the user wishes to set the diagonal we use seperate
1819      code so that MatSetValues() is not called for each diagonal allocating
1820      new memory, thus calling lots of mallocs and slowing things down.
1821 
1822        Contributed by: Mathew Knepley
1823   */
1824   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1825   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1826   if (diag && (l->A->M == l->A->N)) {
1827     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1828   } else if (diag) {
1829     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1830     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1831       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1832 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1833     }
1834     for (i=0; i<slen; i++) {
1835       row  = lrows[i] + rstart_bs;
1836       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1837     }
1838     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1839     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1840   } else {
1841     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1842   }
1843 
1844   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1845   ierr = PetscFree(lrows);CHKERRQ(ierr);
1846 
1847   /* wait on sends */
1848   if (nsends) {
1849     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1850     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1851     ierr = PetscFree(send_status);CHKERRQ(ierr);
1852   }
1853   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1854   ierr = PetscFree(svalues);CHKERRQ(ierr);
1855 
1856   PetscFunctionReturn(0);
1857 }
1858 
1859 #undef __FUNCT__
1860 #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1861 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A)
1862 {
1863   Mat_MPIBAIJ       *a   = (Mat_MPIBAIJ*)A->data;
1864   MPI_Comm          comm = A->comm;
1865   static PetscTruth called = PETSC_FALSE;
1866   PetscErrorCode    ierr;
1867 
1868   PetscFunctionBegin;
1869   if (!a->rank) {
1870     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1871   }
1872   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1873   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1874   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1875   PetscFunctionReturn(0);
1876 }
1877 
1878 #undef __FUNCT__
1879 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1880 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1881 {
1882   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1883   PetscErrorCode ierr;
1884 
1885   PetscFunctionBegin;
1886   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1887   PetscFunctionReturn(0);
1888 }
1889 
1890 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1891 
1892 #undef __FUNCT__
1893 #define __FUNCT__ "MatEqual_MPIBAIJ"
1894 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1895 {
1896   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1897   Mat            a,b,c,d;
1898   PetscTruth     flg;
1899   PetscErrorCode ierr;
1900 
1901   PetscFunctionBegin;
1902   a = matA->A; b = matA->B;
1903   c = matB->A; d = matB->B;
1904 
1905   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1906   if (flg) {
1907     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1908   }
1909   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1910   PetscFunctionReturn(0);
1911 }
1912 
1913 
1914 #undef __FUNCT__
1915 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1916 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1917 {
1918   PetscErrorCode ierr;
1919 
1920   PetscFunctionBegin;
1921   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1922   PetscFunctionReturn(0);
1923 }
1924 
1925 /* -------------------------------------------------------------------*/
1926 static struct _MatOps MatOps_Values = {
1927        MatSetValues_MPIBAIJ,
1928        MatGetRow_MPIBAIJ,
1929        MatRestoreRow_MPIBAIJ,
1930        MatMult_MPIBAIJ,
1931 /* 4*/ MatMultAdd_MPIBAIJ,
1932        MatMultTranspose_MPIBAIJ,
1933        MatMultTransposeAdd_MPIBAIJ,
1934        0,
1935        0,
1936        0,
1937 /*10*/ 0,
1938        0,
1939        0,
1940        0,
1941        MatTranspose_MPIBAIJ,
1942 /*15*/ MatGetInfo_MPIBAIJ,
1943        MatEqual_MPIBAIJ,
1944        MatGetDiagonal_MPIBAIJ,
1945        MatDiagonalScale_MPIBAIJ,
1946        MatNorm_MPIBAIJ,
1947 /*20*/ MatAssemblyBegin_MPIBAIJ,
1948        MatAssemblyEnd_MPIBAIJ,
1949        0,
1950        MatSetOption_MPIBAIJ,
1951        MatZeroEntries_MPIBAIJ,
1952 /*25*/ MatZeroRows_MPIBAIJ,
1953        0,
1954        0,
1955        0,
1956        0,
1957 /*30*/ MatSetUpPreallocation_MPIBAIJ,
1958        0,
1959        0,
1960        0,
1961        0,
1962 /*35*/ MatDuplicate_MPIBAIJ,
1963        0,
1964        0,
1965        0,
1966        0,
1967 /*40*/ 0,
1968        MatGetSubMatrices_MPIBAIJ,
1969        MatIncreaseOverlap_MPIBAIJ,
1970        MatGetValues_MPIBAIJ,
1971        0,
1972 /*45*/ MatPrintHelp_MPIBAIJ,
1973        MatScale_MPIBAIJ,
1974        0,
1975        0,
1976        0,
1977 /*50*/ 0,
1978        0,
1979        0,
1980        0,
1981        0,
1982 /*55*/ 0,
1983        0,
1984        MatSetUnfactored_MPIBAIJ,
1985        0,
1986        MatSetValuesBlocked_MPIBAIJ,
1987 /*60*/ 0,
1988        MatDestroy_MPIBAIJ,
1989        MatView_MPIBAIJ,
1990        MatGetPetscMaps_Petsc,
1991        0,
1992 /*65*/ 0,
1993        0,
1994        0,
1995        0,
1996        0,
1997 /*70*/ MatGetRowMax_MPIBAIJ,
1998        0,
1999        0,
2000        0,
2001        0,
2002 /*75*/ 0,
2003        0,
2004        0,
2005        0,
2006        0,
2007 /*80*/ 0,
2008        0,
2009        0,
2010        0,
2011        MatLoad_MPIBAIJ,
2012 /*85*/ 0,
2013        0,
2014        0,
2015        0,
2016        0,
2017 /*90*/ 0,
2018        0,
2019        0,
2020        0,
2021        0,
2022 /*95*/ 0,
2023        0,
2024        0,
2025        0};
2026 
2027 
2028 EXTERN_C_BEGIN
2029 #undef __FUNCT__
2030 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2031 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2032 {
2033   PetscFunctionBegin;
2034   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2035   *iscopy = PETSC_FALSE;
2036   PetscFunctionReturn(0);
2037 }
2038 EXTERN_C_END
2039 
2040 EXTERN_C_BEGIN
2041 extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
2042 EXTERN_C_END
2043 
2044 #undef __FUNCT__
2045 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2046 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2047 {
2048   Mat_MPIBAIJ    *b  = (Mat_MPIBAIJ *)B->data;
2049   PetscInt       m = B->m/bs,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2050   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2051   const PetscInt *JJ;
2052   PetscScalar    *values;
2053   PetscErrorCode ierr;
2054 
2055   PetscFunctionBegin;
2056 #if defined(PETSC_OPT_g)
2057   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2058 #endif
2059   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2060   o_nnz = d_nnz + m;
2061 
2062   for (i=0; i<m; i++) {
2063     nnz     = I[i+1]- I[i];
2064     JJ      = J + I[i];
2065     nnz_max = PetscMax(nnz_max,nnz);
2066 #if defined(PETSC_OPT_g)
2067     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2068 #endif
2069     for (j=0; j<nnz; j++) {
2070       if (*JJ >= cstart) break;
2071       JJ++;
2072     }
2073     d = 0;
2074     for (; j<nnz; j++) {
2075       if (*JJ++ >= cend) break;
2076       d++;
2077     }
2078     d_nnz[i] = d;
2079     o_nnz[i] = nnz - d;
2080   }
2081   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2082   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2083 
2084   if (v) values = (PetscScalar*)v;
2085   else {
2086     ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2087     ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2088   }
2089 
2090   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2091   for (i=0; i<m; i++) {
2092     ii   = i + rstart;
2093     nnz  = I[i+1]- I[i];
2094     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2095   }
2096   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2097   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2098   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2099 
2100   if (!v) {
2101     ierr = PetscFree(values);CHKERRQ(ierr);
2102   }
2103   PetscFunctionReturn(0);
2104 }
2105 
2106 #undef __FUNCT__
2107 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2108 /*@C
2109    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2110    (the default parallel PETSc format).
2111 
2112    Collective on MPI_Comm
2113 
2114    Input Parameters:
2115 +  A - the matrix
2116 .  i - the indices into j for the start of each local row (starts with zero)
2117 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2118 -  v - optional values in the matrix
2119 
2120    Level: developer
2121 
2122 .keywords: matrix, aij, compressed row, sparse, parallel
2123 
2124 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2125 @*/
2126 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2127 {
2128   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2129 
2130   PetscFunctionBegin;
2131   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2132   if (f) {
2133     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2134   }
2135   PetscFunctionReturn(0);
2136 }
2137 
2138 EXTERN_C_BEGIN
2139 #undef __FUNCT__
2140 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2141 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2142 {
2143   Mat_MPIBAIJ    *b;
2144   PetscErrorCode ierr;
2145   PetscInt       i;
2146 
2147   PetscFunctionBegin;
2148   B->preallocated = PETSC_TRUE;
2149   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2150 
2151   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2152   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2153   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2154   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2155   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2156   if (d_nnz) {
2157   for (i=0; i<B->m/bs; i++) {
2158       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2159     }
2160   }
2161   if (o_nnz) {
2162     for (i=0; i<B->m/bs; i++) {
2163       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2164     }
2165   }
2166 
2167   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2168   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2169   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2170   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2171 
2172   b = (Mat_MPIBAIJ*)B->data;
2173   B->bs  = bs;
2174   b->bs2 = bs*bs;
2175   b->mbs = B->m/bs;
2176   b->nbs = B->n/bs;
2177   b->Mbs = B->M/bs;
2178   b->Nbs = B->N/bs;
2179 
2180   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2181   b->rowners[0]    = 0;
2182   for (i=2; i<=b->size; i++) {
2183     b->rowners[i] += b->rowners[i-1];
2184   }
2185   b->rstart    = b->rowners[b->rank];
2186   b->rend      = b->rowners[b->rank+1];
2187 
2188   ierr = MPI_Allgather(&b->nbs,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2189   b->cowners[0] = 0;
2190   for (i=2; i<=b->size; i++) {
2191     b->cowners[i] += b->cowners[i-1];
2192   }
2193   b->cstart    = b->cowners[b->rank];
2194   b->cend      = b->cowners[b->rank+1];
2195 
2196   for (i=0; i<=b->size; i++) {
2197     b->rowners_bs[i] = b->rowners[i]*bs;
2198   }
2199   b->rstart_bs = b->rstart*bs;
2200   b->rend_bs   = b->rend*bs;
2201   b->cstart_bs = b->cstart*bs;
2202   b->cend_bs   = b->cend*bs;
2203 
2204   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
2205   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2206   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2207   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2208   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
2209   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2210   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2211   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2212 
2213   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2214 
2215   PetscFunctionReturn(0);
2216 }
2217 EXTERN_C_END
2218 
2219 EXTERN_C_BEGIN
2220 EXTERN PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2221 EXTERN PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2222 EXTERN_C_END
2223 
2224 /*MC
2225    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2226 
2227    Options Database Keys:
2228 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2229 
2230   Level: beginner
2231 
2232 .seealso: MatCreateMPIBAIJ
2233 M*/
2234 
2235 EXTERN_C_BEGIN
2236 #undef __FUNCT__
2237 #define __FUNCT__ "MatCreate_MPIBAIJ"
2238 PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2239 {
2240   Mat_MPIBAIJ    *b;
2241   PetscErrorCode ierr;
2242   PetscTruth     flg;
2243 
2244   PetscFunctionBegin;
2245   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2246   B->data = (void*)b;
2247 
2248   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2249   B->mapping    = 0;
2250   B->factor     = 0;
2251   B->assembled  = PETSC_FALSE;
2252 
2253   B->insertmode = NOT_SET_VALUES;
2254   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2255   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2256 
2257   /* build local table of row and column ownerships */
2258   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
2259   ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2260   b->cowners    = b->rowners + b->size + 2;
2261   b->rowners_bs = b->cowners + b->size + 2;
2262 
2263   /* build cache for off array entries formed */
2264   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2265   b->donotstash  = PETSC_FALSE;
2266   b->colmap      = PETSC_NULL;
2267   b->garray      = PETSC_NULL;
2268   b->roworiented = PETSC_TRUE;
2269 
2270 #if defined(PETSC_USE_MAT_SINGLE)
2271   /* stuff for MatSetValues_XXX in single precision */
2272   b->setvalueslen     = 0;
2273   b->setvaluescopy    = PETSC_NULL;
2274 #endif
2275 
2276   /* stuff used in block assembly */
2277   b->barray       = 0;
2278 
2279   /* stuff used for matrix vector multiply */
2280   b->lvec         = 0;
2281   b->Mvctx        = 0;
2282 
2283   /* stuff for MatGetRow() */
2284   b->rowindices   = 0;
2285   b->rowvalues    = 0;
2286   b->getrowactive = PETSC_FALSE;
2287 
2288   /* hash table stuff */
2289   b->ht           = 0;
2290   b->hd           = 0;
2291   b->ht_size      = 0;
2292   b->ht_flag      = PETSC_FALSE;
2293   b->ht_fact      = 0;
2294   b->ht_total_ct  = 0;
2295   b->ht_insert_ct = 0;
2296 
2297   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2298   if (flg) {
2299     PetscReal fact = 1.39;
2300     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2301     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2302     if (fact <= 1.0) fact = 1.39;
2303     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2304     PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2305   }
2306   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2307                                      "MatStoreValues_MPIBAIJ",
2308                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2309   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2310                                      "MatRetrieveValues_MPIBAIJ",
2311                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2312   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2313                                      "MatGetDiagonalBlock_MPIBAIJ",
2314                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2315   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2316                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2317                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2318   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2319 				     "MatMPIBAIJSetPreallocationCSR_MPIAIJ",
2320 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2321   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2322                                      "MatDiagonalScaleLocal_MPIBAIJ",
2323                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2324   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2325                                      "MatSetHashTableFactor_MPIBAIJ",
2326                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2327   PetscFunctionReturn(0);
2328 }
2329 EXTERN_C_END
2330 
2331 /*MC
2332    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2333 
2334    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2335    and MATMPIBAIJ otherwise.
2336 
2337    Options Database Keys:
2338 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2339 
2340   Level: beginner
2341 
2342 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2343 M*/
2344 
2345 EXTERN_C_BEGIN
2346 #undef __FUNCT__
2347 #define __FUNCT__ "MatCreate_BAIJ"
2348 PetscErrorCode MatCreate_BAIJ(Mat A)
2349 {
2350   PetscErrorCode ierr;
2351   PetscMPIInt    size;
2352 
2353   PetscFunctionBegin;
2354   ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr);
2355   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2356   if (size == 1) {
2357     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2358   } else {
2359     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2360   }
2361   PetscFunctionReturn(0);
2362 }
2363 EXTERN_C_END
2364 
2365 #undef __FUNCT__
2366 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2367 /*@C
2368    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2369    (block compressed row).  For good matrix assembly performance
2370    the user should preallocate the matrix storage by setting the parameters
2371    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2372    performance can be increased by more than a factor of 50.
2373 
2374    Collective on Mat
2375 
2376    Input Parameters:
2377 +  A - the matrix
2378 .  bs   - size of blockk
2379 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2380            submatrix  (same for all local rows)
2381 .  d_nnz - array containing the number of block nonzeros in the various block rows
2382            of the in diagonal portion of the local (possibly different for each block
2383            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2384 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2385            submatrix (same for all local rows).
2386 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2387            off-diagonal portion of the local submatrix (possibly different for
2388            each block row) or PETSC_NULL.
2389 
2390    If the *_nnz parameter is given then the *_nz parameter is ignored
2391 
2392    Options Database Keys:
2393 .   -mat_no_unroll - uses code that does not unroll the loops in the
2394                      block calculations (much slower)
2395 .   -mat_block_size - size of the blocks to use
2396 
2397    Notes:
2398    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2399    than it must be used on all processors that share the object for that argument.
2400 
2401    Storage Information:
2402    For a square global matrix we define each processor's diagonal portion
2403    to be its local rows and the corresponding columns (a square submatrix);
2404    each processor's off-diagonal portion encompasses the remainder of the
2405    local matrix (a rectangular submatrix).
2406 
2407    The user can specify preallocated storage for the diagonal part of
2408    the local submatrix with either d_nz or d_nnz (not both).  Set
2409    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2410    memory allocation.  Likewise, specify preallocated storage for the
2411    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2412 
2413    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2414    the figure below we depict these three local rows and all columns (0-11).
2415 
2416 .vb
2417            0 1 2 3 4 5 6 7 8 9 10 11
2418           -------------------
2419    row 3  |  o o o d d d o o o o o o
2420    row 4  |  o o o d d d o o o o o o
2421    row 5  |  o o o d d d o o o o o o
2422           -------------------
2423 .ve
2424 
2425    Thus, any entries in the d locations are stored in the d (diagonal)
2426    submatrix, and any entries in the o locations are stored in the
2427    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2428    stored simply in the MATSEQBAIJ format for compressed row storage.
2429 
2430    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2431    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2432    In general, for PDE problems in which most nonzeros are near the diagonal,
2433    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2434    or you will get TERRIBLE performance; see the users' manual chapter on
2435    matrices.
2436 
2437    Level: intermediate
2438 
2439 .keywords: matrix, block, aij, compressed row, sparse, parallel
2440 
2441 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2442 @*/
2443 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2444 {
2445   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2446 
2447   PetscFunctionBegin;
2448   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2449   if (f) {
2450     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2451   }
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 #undef __FUNCT__
2456 #define __FUNCT__ "MatCreateMPIBAIJ"
2457 /*@C
2458    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2459    (block compressed row).  For good matrix assembly performance
2460    the user should preallocate the matrix storage by setting the parameters
2461    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2462    performance can be increased by more than a factor of 50.
2463 
2464    Collective on MPI_Comm
2465 
2466    Input Parameters:
2467 +  comm - MPI communicator
2468 .  bs   - size of blockk
2469 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2470            This value should be the same as the local size used in creating the
2471            y vector for the matrix-vector product y = Ax.
2472 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2473            This value should be the same as the local size used in creating the
2474            x vector for the matrix-vector product y = Ax.
2475 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2476 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2477 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2478            submatrix  (same for all local rows)
2479 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2480            of the in diagonal portion of the local (possibly different for each block
2481            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2482 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2483            submatrix (same for all local rows).
2484 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2485            off-diagonal portion of the local submatrix (possibly different for
2486            each block row) or PETSC_NULL.
2487 
2488    Output Parameter:
2489 .  A - the matrix
2490 
2491    Options Database Keys:
2492 .   -mat_no_unroll - uses code that does not unroll the loops in the
2493                      block calculations (much slower)
2494 .   -mat_block_size - size of the blocks to use
2495 
2496    Notes:
2497    If the *_nnz parameter is given then the *_nz parameter is ignored
2498 
2499    A nonzero block is any block that as 1 or more nonzeros in it
2500 
2501    The user MUST specify either the local or global matrix dimensions
2502    (possibly both).
2503 
2504    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2505    than it must be used on all processors that share the object for that argument.
2506 
2507    Storage Information:
2508    For a square global matrix we define each processor's diagonal portion
2509    to be its local rows and the corresponding columns (a square submatrix);
2510    each processor's off-diagonal portion encompasses the remainder of the
2511    local matrix (a rectangular submatrix).
2512 
2513    The user can specify preallocated storage for the diagonal part of
2514    the local submatrix with either d_nz or d_nnz (not both).  Set
2515    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2516    memory allocation.  Likewise, specify preallocated storage for the
2517    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2518 
2519    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2520    the figure below we depict these three local rows and all columns (0-11).
2521 
2522 .vb
2523            0 1 2 3 4 5 6 7 8 9 10 11
2524           -------------------
2525    row 3  |  o o o d d d o o o o o o
2526    row 4  |  o o o d d d o o o o o o
2527    row 5  |  o o o d d d o o o o o o
2528           -------------------
2529 .ve
2530 
2531    Thus, any entries in the d locations are stored in the d (diagonal)
2532    submatrix, and any entries in the o locations are stored in the
2533    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2534    stored simply in the MATSEQBAIJ format for compressed row storage.
2535 
2536    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2537    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2538    In general, for PDE problems in which most nonzeros are near the diagonal,
2539    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2540    or you will get TERRIBLE performance; see the users' manual chapter on
2541    matrices.
2542 
2543    Level: intermediate
2544 
2545 .keywords: matrix, block, aij, compressed row, sparse, parallel
2546 
2547 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2548 @*/
2549 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2550 {
2551   PetscErrorCode ierr;
2552   PetscMPIInt    size;
2553 
2554   PetscFunctionBegin;
2555   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2556   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2557   if (size > 1) {
2558     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2559     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2560   } else {
2561     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2562     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2563   }
2564   PetscFunctionReturn(0);
2565 }
2566 
2567 #undef __FUNCT__
2568 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2569 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2570 {
2571   Mat            mat;
2572   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2573   PetscErrorCode ierr;
2574   PetscInt       len=0;
2575 
2576   PetscFunctionBegin;
2577   *newmat       = 0;
2578   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2579   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2580   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2581 
2582   mat->factor       = matin->factor;
2583   mat->preallocated = PETSC_TRUE;
2584   mat->assembled    = PETSC_TRUE;
2585   mat->insertmode   = NOT_SET_VALUES;
2586 
2587   a      = (Mat_MPIBAIJ*)mat->data;
2588   mat->bs  = matin->bs;
2589   a->bs2   = oldmat->bs2;
2590   a->mbs   = oldmat->mbs;
2591   a->nbs   = oldmat->nbs;
2592   a->Mbs   = oldmat->Mbs;
2593   a->Nbs   = oldmat->Nbs;
2594 
2595   a->rstart       = oldmat->rstart;
2596   a->rend         = oldmat->rend;
2597   a->cstart       = oldmat->cstart;
2598   a->cend         = oldmat->cend;
2599   a->size         = oldmat->size;
2600   a->rank         = oldmat->rank;
2601   a->donotstash   = oldmat->donotstash;
2602   a->roworiented  = oldmat->roworiented;
2603   a->rowindices   = 0;
2604   a->rowvalues    = 0;
2605   a->getrowactive = PETSC_FALSE;
2606   a->barray       = 0;
2607   a->rstart_bs    = oldmat->rstart_bs;
2608   a->rend_bs      = oldmat->rend_bs;
2609   a->cstart_bs    = oldmat->cstart_bs;
2610   a->cend_bs      = oldmat->cend_bs;
2611 
2612   /* hash table stuff */
2613   a->ht           = 0;
2614   a->hd           = 0;
2615   a->ht_size      = 0;
2616   a->ht_flag      = oldmat->ht_flag;
2617   a->ht_fact      = oldmat->ht_fact;
2618   a->ht_total_ct  = 0;
2619   a->ht_insert_ct = 0;
2620 
2621   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2622   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2623   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
2624   if (oldmat->colmap) {
2625 #if defined (PETSC_USE_CTABLE)
2626   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2627 #else
2628   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2629   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2630   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2631 #endif
2632   } else a->colmap = 0;
2633 
2634   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2635     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2636     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2637     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2638   } else a->garray = 0;
2639 
2640   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2641   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2642   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2643   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2644 
2645   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2646   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2647   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2648   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2649   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2650   *newmat = mat;
2651 
2652   PetscFunctionReturn(0);
2653 }
2654 
2655 #include "petscsys.h"
2656 
2657 #undef __FUNCT__
2658 #define __FUNCT__ "MatLoad_MPIBAIJ"
2659 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2660 {
2661   Mat            A;
2662   PetscErrorCode ierr;
2663   int            fd;
2664   PetscInt       i,nz,j,rstart,rend;
2665   PetscScalar    *vals,*buf;
2666   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2667   MPI_Status     status;
2668   PetscMPIInt    rank,size,maxnz;
2669   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2670   PetscInt       *locrowlens,*procsnz = 0,*browners;
2671   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
2672   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
2673   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2674   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
2675 
2676   PetscFunctionBegin;
2677   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2678 
2679   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2680   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2681   if (!rank) {
2682     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2683     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2684     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2685   }
2686 
2687   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2688   M = header[1]; N = header[2];
2689 
2690   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2691 
2692   /*
2693      This code adds extra rows to make sure the number of rows is
2694      divisible by the blocksize
2695   */
2696   Mbs        = M/bs;
2697   extra_rows = bs - M + bs*Mbs;
2698   if (extra_rows == bs) extra_rows = 0;
2699   else                  Mbs++;
2700   if (extra_rows && !rank) {
2701     PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2702   }
2703 
2704   /* determine ownership of all rows */
2705   mbs        = Mbs/size + ((Mbs % size) > rank);
2706   m          = mbs*bs;
2707   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
2708   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2709 
2710   /* process 0 needs enough room for process with most rows */
2711   if (!rank) {
2712     mmax = rowners[1];
2713     for (i=2; i<size; i++) {
2714       mmax = PetscMax(mmax,rowners[i]);
2715     }
2716   } else mmax = m;
2717 
2718   rowners[0] = 0;
2719   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
2720   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2721   rstart = rowners[rank];
2722   rend   = rowners[rank+1];
2723 
2724   /* distribute row lengths to all processors */
2725   ierr = PetscMalloc(mmax*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2726   if (!rank) {
2727     mend = m;
2728     if (size == 1) mend = mend - extra_rows;
2729     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
2730     for (j=mend; j<m; j++) locrowlens[j] = 1;
2731     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2732     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2733     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2734     for (j=0; j<m; j++) {
2735       procsnz[0] += locrowlens[j];
2736     }
2737     for (i=1; i<size; i++) {
2738       mend = browners[i+1] - browners[i];
2739       if (i == size-1) mend = mend - extra_rows;
2740       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
2741       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2742       /* calculate the number of nonzeros on each processor */
2743       for (j=0; j<browners[i+1]-browners[i]; j++) {
2744         procsnz[i] += rowlengths[j];
2745       }
2746       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2747     }
2748     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2749   } else {
2750     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2751   }
2752 
2753   if (!rank) {
2754     /* determine max buffer needed and allocate it */
2755     maxnz = procsnz[0];
2756     for (i=1; i<size; i++) {
2757       maxnz = PetscMax(maxnz,procsnz[i]);
2758     }
2759     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2760 
2761     /* read in my part of the matrix column indices  */
2762     nz     = procsnz[0];
2763     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2764     mycols = ibuf;
2765     if (size == 1)  nz -= extra_rows;
2766     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2767     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2768 
2769     /* read in every ones (except the last) and ship off */
2770     for (i=1; i<size-1; i++) {
2771       nz   = procsnz[i];
2772       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2773       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2774     }
2775     /* read in the stuff for the last proc */
2776     if (size != 1) {
2777       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2778       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2779       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2780       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2781     }
2782     ierr = PetscFree(cols);CHKERRQ(ierr);
2783   } else {
2784     /* determine buffer space needed for message */
2785     nz = 0;
2786     for (i=0; i<m; i++) {
2787       nz += locrowlens[i];
2788     }
2789     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2790     mycols = ibuf;
2791     /* receive message of column indices*/
2792     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2793     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2794     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2795   }
2796 
2797   /* loop over local rows, determining number of off diagonal entries */
2798   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2799   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2800   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2801   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2802   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2803   rowcount = 0; nzcount = 0;
2804   for (i=0; i<mbs; i++) {
2805     dcount  = 0;
2806     odcount = 0;
2807     for (j=0; j<bs; j++) {
2808       kmax = locrowlens[rowcount];
2809       for (k=0; k<kmax; k++) {
2810         tmp = mycols[nzcount++]/bs;
2811         if (!mask[tmp]) {
2812           mask[tmp] = 1;
2813           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2814           else masked1[dcount++] = tmp;
2815         }
2816       }
2817       rowcount++;
2818     }
2819 
2820     dlens[i]  = dcount;
2821     odlens[i] = odcount;
2822 
2823     /* zero out the mask elements we set */
2824     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2825     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2826   }
2827 
2828   /* create our matrix */
2829   ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr);
2830   ierr = MatSetType(A,type);CHKERRQ(ierr)
2831   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2832 
2833   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
2834   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2835 
2836   if (!rank) {
2837     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2838     /* read in my part of the matrix numerical values  */
2839     nz = procsnz[0];
2840     vals = buf;
2841     mycols = ibuf;
2842     if (size == 1)  nz -= extra_rows;
2843     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2844     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2845 
2846     /* insert into matrix */
2847     jj      = rstart*bs;
2848     for (i=0; i<m; i++) {
2849       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2850       mycols += locrowlens[i];
2851       vals   += locrowlens[i];
2852       jj++;
2853     }
2854     /* read in other processors (except the last one) and ship out */
2855     for (i=1; i<size-1; i++) {
2856       nz   = procsnz[i];
2857       vals = buf;
2858       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2859       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2860     }
2861     /* the last proc */
2862     if (size != 1){
2863       nz   = procsnz[i] - extra_rows;
2864       vals = buf;
2865       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2866       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2867       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2868     }
2869     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2870   } else {
2871     /* receive numeric values */
2872     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2873 
2874     /* receive message of values*/
2875     vals   = buf;
2876     mycols = ibuf;
2877     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2878     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2879     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2880 
2881     /* insert into matrix */
2882     jj      = rstart*bs;
2883     for (i=0; i<m; i++) {
2884       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2885       mycols += locrowlens[i];
2886       vals   += locrowlens[i];
2887       jj++;
2888     }
2889   }
2890   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2891   ierr = PetscFree(buf);CHKERRQ(ierr);
2892   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2893   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2894   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2895   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2896   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2897   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2898 
2899   *newmat = A;
2900   PetscFunctionReturn(0);
2901 }
2902 
2903 #undef __FUNCT__
2904 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2905 /*@
2906    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2907 
2908    Input Parameters:
2909 .  mat  - the matrix
2910 .  fact - factor
2911 
2912    Collective on Mat
2913 
2914    Level: advanced
2915 
2916   Notes:
2917    This can also be set by the command line option: -mat_use_hash_table fact
2918 
2919 .keywords: matrix, hashtable, factor, HT
2920 
2921 .seealso: MatSetOption()
2922 @*/
2923 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2924 {
2925   PetscErrorCode ierr,(*f)(Mat,PetscReal);
2926 
2927   PetscFunctionBegin;
2928   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2929   if (f) {
2930     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2931   }
2932   PetscFunctionReturn(0);
2933 }
2934 
2935 #undef __FUNCT__
2936 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2937 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2938 {
2939   Mat_MPIBAIJ *baij;
2940 
2941   PetscFunctionBegin;
2942   baij = (Mat_MPIBAIJ*)mat->data;
2943   baij->ht_fact = fact;
2944   PetscFunctionReturn(0);
2945 }
2946 
2947 #undef __FUNCT__
2948 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2949 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2950 {
2951   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2952   PetscFunctionBegin;
2953   *Ad     = a->A;
2954   *Ao     = a->B;
2955   *colmap = a->garray;
2956   PetscFunctionReturn(0);
2957 }
2958