xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 5ed2d596fd9914de76abbaed56c65f4792ae57ae)
1 /*$Id: mpibaij.c,v 1.234 2001/09/25 22:56:49 balay Exp $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 #include "src/vec/vecimpl.h"
5 
6 EXTERN int MatSetUpMultiply_MPIBAIJ(Mat);
7 EXTERN int DisAssemble_MPIBAIJ(Mat);
8 EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
9 EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
10 EXTERN int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,PetscScalar *);
11 EXTERN int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,PetscScalar *,InsertMode);
12 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
13 EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int**,PetscScalar**);
14 EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,PetscScalar**);
15 EXTERN int MatPrintHelp_SeqBAIJ(Mat);
16 EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*);
17 
18 /*  UGLY, ugly, ugly
19    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
20    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
21    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
22    converts the entries into single precision and then calls ..._MatScalar() to put them
23    into the single precision data structures.
24 */
25 #if defined(PETSC_USE_MAT_SINGLE)
26 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
27 EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
28 EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
29 EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
30 EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
31 #else
32 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
33 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
34 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
35 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
36 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
37 #endif
38 
39 #undef __FUNCT__
40 #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
41 int MatGetRowMax_MPIBAIJ(Mat A,Vec v)
42 {
43   Mat_MPIBAIJ  *a = (Mat_MPIBAIJ*)A->data;
44   int          ierr,i;
45   PetscScalar  *va,*vb;
46   Vec          vtmp;
47 
48   PetscFunctionBegin;
49 
50   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
51   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
52 
53   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
54   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
55   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
56 
57   for (i=0; i<A->m; i++){
58     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
59   }
60 
61   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
62   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
63   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
64 
65   PetscFunctionReturn(0);
66 }
67 
68 EXTERN_C_BEGIN
69 #undef __FUNCT__
70 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
71 int MatStoreValues_MPIBAIJ(Mat mat)
72 {
73   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
74   int         ierr;
75 
76   PetscFunctionBegin;
77   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
78   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
79   PetscFunctionReturn(0);
80 }
81 EXTERN_C_END
82 
83 EXTERN_C_BEGIN
84 #undef __FUNCT__
85 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
86 int MatRetrieveValues_MPIBAIJ(Mat mat)
87 {
88   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
89   int         ierr;
90 
91   PetscFunctionBegin;
92   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
93   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
94   PetscFunctionReturn(0);
95 }
96 EXTERN_C_END
97 
98 /*
99      Local utility routine that creates a mapping from the global column
100    number to the local number in the off-diagonal part of the local
101    storage of the matrix.  This is done in a non scable way since the
102    length of colmap equals the global matrix length.
103 */
104 #undef __FUNCT__
105 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
106 static int CreateColmap_MPIBAIJ_Private(Mat mat)
107 {
108   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
109   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
110   int         nbs = B->nbs,i,bs=B->bs,ierr;
111 
112   PetscFunctionBegin;
113 #if defined (PETSC_USE_CTABLE)
114   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
115   for (i=0; i<nbs; i++){
116     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
117   }
118 #else
119   ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr);
120   PetscLogObjectMemory(mat,baij->Nbs*sizeof(int));
121   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
122   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
123 #endif
124   PetscFunctionReturn(0);
125 }
126 
127 #define CHUNKSIZE  10
128 
129 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
130 { \
131  \
132     brow = row/bs;  \
133     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
134     rmax = aimax[brow]; nrow = ailen[brow]; \
135       bcol = col/bs; \
136       ridx = row % bs; cidx = col % bs; \
137       low = 0; high = nrow; \
138       while (high-low > 3) { \
139         t = (low+high)/2; \
140         if (rp[t] > bcol) high = t; \
141         else              low  = t; \
142       } \
143       for (_i=low; _i<high; _i++) { \
144         if (rp[_i] > bcol) break; \
145         if (rp[_i] == bcol) { \
146           bap  = ap +  bs2*_i + bs*cidx + ridx; \
147           if (addv == ADD_VALUES) *bap += value;  \
148           else                    *bap  = value;  \
149           goto a_noinsert; \
150         } \
151       } \
152       if (a->nonew == 1) goto a_noinsert; \
153       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
154       if (nrow >= rmax) { \
155         /* there is no extra room in row, therefore enlarge */ \
156         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
157         MatScalar *new_a; \
158  \
159         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
160  \
161         /* malloc new storage space */ \
162         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
163         ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
164         new_j   = (int*)(new_a + bs2*new_nz); \
165         new_i   = new_j + new_nz; \
166  \
167         /* copy over old data into new slots */ \
168         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
169         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
170         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
171         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
172         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
173         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
174         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
175         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
176                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
177         /* free up old matrix storage */ \
178         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
179         if (!a->singlemalloc) { \
180           ierr = PetscFree(a->i);CHKERRQ(ierr); \
181           ierr = PetscFree(a->j);CHKERRQ(ierr);\
182         } \
183         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
184         a->singlemalloc = PETSC_TRUE; \
185  \
186         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
187         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
188         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
189         a->maxnz += bs2*CHUNKSIZE; \
190         a->reallocs++; \
191         a->nz++; \
192       } \
193       N = nrow++ - 1;  \
194       /* shift up all the later entries in this row */ \
195       for (ii=N; ii>=_i; ii--) { \
196         rp[ii+1] = rp[ii]; \
197         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198       } \
199       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
200       rp[_i]                      = bcol;  \
201       ap[bs2*_i + bs*cidx + ridx] = value;  \
202       a_noinsert:; \
203     ailen[brow] = nrow; \
204 }
205 
206 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
207 { \
208     brow = row/bs;  \
209     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
210     rmax = bimax[brow]; nrow = bilen[brow]; \
211       bcol = col/bs; \
212       ridx = row % bs; cidx = col % bs; \
213       low = 0; high = nrow; \
214       while (high-low > 3) { \
215         t = (low+high)/2; \
216         if (rp[t] > bcol) high = t; \
217         else              low  = t; \
218       } \
219       for (_i=low; _i<high; _i++) { \
220         if (rp[_i] > bcol) break; \
221         if (rp[_i] == bcol) { \
222           bap  = ap +  bs2*_i + bs*cidx + ridx; \
223           if (addv == ADD_VALUES) *bap += value;  \
224           else                    *bap  = value;  \
225           goto b_noinsert; \
226         } \
227       } \
228       if (b->nonew == 1) goto b_noinsert; \
229       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
230       if (nrow >= rmax) { \
231         /* there is no extra room in row, therefore enlarge */ \
232         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
233         MatScalar *new_a; \
234  \
235         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
236  \
237         /* malloc new storage space */ \
238         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
239         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
240         new_j   = (int*)(new_a + bs2*new_nz); \
241         new_i   = new_j + new_nz; \
242  \
243         /* copy over old data into new slots */ \
244         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
245         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
246         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
247         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
248         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
249         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
250         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
251         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
252                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
253         /* free up old matrix storage */ \
254         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
255         if (!b->singlemalloc) { \
256           ierr = PetscFree(b->i);CHKERRQ(ierr); \
257           ierr = PetscFree(b->j);CHKERRQ(ierr); \
258         } \
259         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
260         b->singlemalloc = PETSC_TRUE; \
261  \
262         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
263         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
264         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
265         b->maxnz += bs2*CHUNKSIZE; \
266         b->reallocs++; \
267         b->nz++; \
268       } \
269       N = nrow++ - 1;  \
270       /* shift up all the later entries in this row */ \
271       for (ii=N; ii>=_i; ii--) { \
272         rp[ii+1] = rp[ii]; \
273         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
274       } \
275       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
276       rp[_i]                      = bcol;  \
277       ap[bs2*_i + bs*cidx + ridx] = value;  \
278       b_noinsert:; \
279     bilen[brow] = nrow; \
280 }
281 
282 #if defined(PETSC_USE_MAT_SINGLE)
283 #undef __FUNCT__
284 #define __FUNCT__ "MatSetValues_MPIBAIJ"
285 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
286 {
287   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
288   int         ierr,i,N = m*n;
289   MatScalar   *vsingle;
290 
291   PetscFunctionBegin;
292   if (N > b->setvalueslen) {
293     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
294     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
295     b->setvalueslen  = N;
296   }
297   vsingle = b->setvaluescopy;
298 
299   for (i=0; i<N; i++) {
300     vsingle[i] = v[i];
301   }
302   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
303   PetscFunctionReturn(0);
304 }
305 
306 #undef __FUNCT__
307 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
308 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
309 {
310   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
311   int         ierr,i,N = m*n*b->bs2;
312   MatScalar   *vsingle;
313 
314   PetscFunctionBegin;
315   if (N > b->setvalueslen) {
316     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
317     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
318     b->setvalueslen  = N;
319   }
320   vsingle = b->setvaluescopy;
321   for (i=0; i<N; i++) {
322     vsingle[i] = v[i];
323   }
324   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
325   PetscFunctionReturn(0);
326 }
327 
328 #undef __FUNCT__
329 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
330 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
331 {
332   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
333   int         ierr,i,N = m*n;
334   MatScalar   *vsingle;
335 
336   PetscFunctionBegin;
337   if (N > b->setvalueslen) {
338     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
339     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
340     b->setvalueslen  = N;
341   }
342   vsingle = b->setvaluescopy;
343   for (i=0; i<N; i++) {
344     vsingle[i] = v[i];
345   }
346   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
347   PetscFunctionReturn(0);
348 }
349 
350 #undef __FUNCT__
351 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
352 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
353 {
354   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
355   int         ierr,i,N = m*n*b->bs2;
356   MatScalar   *vsingle;
357 
358   PetscFunctionBegin;
359   if (N > b->setvalueslen) {
360     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
361     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
362     b->setvalueslen  = N;
363   }
364   vsingle = b->setvaluescopy;
365   for (i=0; i<N; i++) {
366     vsingle[i] = v[i];
367   }
368   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
369   PetscFunctionReturn(0);
370 }
371 #endif
372 
373 #undef __FUNCT__
374 #define __FUNCT__ "MatSetValues_MPIBAIJ"
375 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
376 {
377   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
378   MatScalar   value;
379   PetscTruth  roworiented = baij->roworiented;
380   int         ierr,i,j,row,col;
381   int         rstart_orig=baij->rstart_bs;
382   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
383   int         cend_orig=baij->cend_bs,bs=baij->bs;
384 
385   /* Some Variables required in the macro */
386   Mat         A = baij->A;
387   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
388   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
389   MatScalar   *aa=a->a;
390 
391   Mat         B = baij->B;
392   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
393   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
394   MatScalar   *ba=b->a;
395 
396   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
397   int         low,high,t,ridx,cidx,bs2=a->bs2;
398   MatScalar   *ap,*bap;
399 
400   PetscFunctionBegin;
401   for (i=0; i<m; i++) {
402     if (im[i] < 0) continue;
403 #if defined(PETSC_USE_BOPT_g)
404     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
405 #endif
406     if (im[i] >= rstart_orig && im[i] < rend_orig) {
407       row = im[i] - rstart_orig;
408       for (j=0; j<n; j++) {
409         if (in[j] >= cstart_orig && in[j] < cend_orig){
410           col = in[j] - cstart_orig;
411           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
412           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
413           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
414         } else if (in[j] < 0) continue;
415 #if defined(PETSC_USE_BOPT_g)
416         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
417 #endif
418         else {
419           if (mat->was_assembled) {
420             if (!baij->colmap) {
421               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
422             }
423 #if defined (PETSC_USE_CTABLE)
424             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
425             col  = col - 1;
426 #else
427             col = baij->colmap[in[j]/bs] - 1;
428 #endif
429             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
430               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
431               col =  in[j];
432               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
433               B = baij->B;
434               b = (Mat_SeqBAIJ*)(B)->data;
435               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
436               ba=b->a;
437             } else col += in[j]%bs;
438           } else col = in[j];
439           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
440           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
441           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
442         }
443       }
444     } else {
445       if (!baij->donotstash) {
446         if (roworiented) {
447           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
448         } else {
449           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
450         }
451       }
452     }
453   }
454   PetscFunctionReturn(0);
455 }
456 
457 #undef __FUNCT__
458 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
459 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
460 {
461   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
462   MatScalar   *value,*barray=baij->barray;
463   PetscTruth  roworiented = baij->roworiented;
464   int         ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
465   int         rend=baij->rend,cstart=baij->cstart,stepval;
466   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
467 
468   PetscFunctionBegin;
469   if(!barray) {
470     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
471     baij->barray = barray;
472   }
473 
474   if (roworiented) {
475     stepval = (n-1)*bs;
476   } else {
477     stepval = (m-1)*bs;
478   }
479   for (i=0; i<m; i++) {
480     if (im[i] < 0) continue;
481 #if defined(PETSC_USE_BOPT_g)
482     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs);
483 #endif
484     if (im[i] >= rstart && im[i] < rend) {
485       row = im[i] - rstart;
486       for (j=0; j<n; j++) {
487         /* If NumCol = 1 then a copy is not required */
488         if ((roworiented) && (n == 1)) {
489           barray = v + i*bs2;
490         } else if((!roworiented) && (m == 1)) {
491           barray = v + j*bs2;
492         } else { /* Here a copy is required */
493           if (roworiented) {
494             value = v + i*(stepval+bs)*bs + j*bs;
495           } else {
496             value = v + j*(stepval+bs)*bs + i*bs;
497           }
498           for (ii=0; ii<bs; ii++,value+=stepval) {
499             for (jj=0; jj<bs; jj++) {
500               *barray++  = *value++;
501             }
502           }
503           barray -=bs2;
504         }
505 
506         if (in[j] >= cstart && in[j] < cend){
507           col  = in[j] - cstart;
508           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
509         }
510         else if (in[j] < 0) continue;
511 #if defined(PETSC_USE_BOPT_g)
512         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs);}
513 #endif
514         else {
515           if (mat->was_assembled) {
516             if (!baij->colmap) {
517               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
518             }
519 
520 #if defined(PETSC_USE_BOPT_g)
521 #if defined (PETSC_USE_CTABLE)
522             { int data;
523               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
524               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
525             }
526 #else
527             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
528 #endif
529 #endif
530 #if defined (PETSC_USE_CTABLE)
531 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
532             col  = (col - 1)/bs;
533 #else
534             col = (baij->colmap[in[j]] - 1)/bs;
535 #endif
536             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
537               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
538               col =  in[j];
539             }
540           }
541           else col = in[j];
542           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
543         }
544       }
545     } else {
546       if (!baij->donotstash) {
547         if (roworiented) {
548           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
549         } else {
550           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
551         }
552       }
553     }
554   }
555   PetscFunctionReturn(0);
556 }
557 
558 #define HASH_KEY 0.6180339887
559 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
560 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
561 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
562 #undef __FUNCT__
563 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
564 int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
565 {
566   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
567   PetscTruth  roworiented = baij->roworiented;
568   int         ierr,i,j,row,col;
569   int         rstart_orig=baij->rstart_bs;
570   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
571   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
572   PetscReal   tmp;
573   MatScalar   **HD = baij->hd,value;
574 #if defined(PETSC_USE_BOPT_g)
575   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
576 #endif
577 
578   PetscFunctionBegin;
579 
580   for (i=0; i<m; i++) {
581 #if defined(PETSC_USE_BOPT_g)
582     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
583     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
584 #endif
585       row = im[i];
586     if (row >= rstart_orig && row < rend_orig) {
587       for (j=0; j<n; j++) {
588         col = in[j];
589         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
590         /* Look up into the Hash Table */
591         key = (row/bs)*Nbs+(col/bs)+1;
592         h1  = HASH(size,key,tmp);
593 
594 
595         idx = h1;
596 #if defined(PETSC_USE_BOPT_g)
597         insert_ct++;
598         total_ct++;
599         if (HT[idx] != key) {
600           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
601           if (idx == size) {
602             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
603             if (idx == h1) {
604               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
605             }
606           }
607         }
608 #else
609         if (HT[idx] != key) {
610           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
611           if (idx == size) {
612             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
613             if (idx == h1) {
614               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
615             }
616           }
617         }
618 #endif
619         /* A HASH table entry is found, so insert the values at the correct address */
620         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
621         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
622       }
623     } else {
624       if (!baij->donotstash) {
625         if (roworiented) {
626           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
627         } else {
628           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
629         }
630       }
631     }
632   }
633 #if defined(PETSC_USE_BOPT_g)
634   baij->ht_total_ct = total_ct;
635   baij->ht_insert_ct = insert_ct;
636 #endif
637   PetscFunctionReturn(0);
638 }
639 
640 #undef __FUNCT__
641 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
642 int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
643 {
644   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
645   PetscTruth  roworiented = baij->roworiented;
646   int         ierr,i,j,ii,jj,row,col;
647   int         rstart=baij->rstart ;
648   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
649   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
650   PetscReal   tmp;
651   MatScalar   **HD = baij->hd,*baij_a;
652   MatScalar   *v_t,*value;
653 #if defined(PETSC_USE_BOPT_g)
654   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
655 #endif
656 
657   PetscFunctionBegin;
658 
659   if (roworiented) {
660     stepval = (n-1)*bs;
661   } else {
662     stepval = (m-1)*bs;
663   }
664   for (i=0; i<m; i++) {
665 #if defined(PETSC_USE_BOPT_g)
666     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
667     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
668 #endif
669     row   = im[i];
670     v_t   = v + i*bs2;
671     if (row >= rstart && row < rend) {
672       for (j=0; j<n; j++) {
673         col = in[j];
674 
675         /* Look up into the Hash Table */
676         key = row*Nbs+col+1;
677         h1  = HASH(size,key,tmp);
678 
679         idx = h1;
680 #if defined(PETSC_USE_BOPT_g)
681         total_ct++;
682         insert_ct++;
683        if (HT[idx] != key) {
684           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
685           if (idx == size) {
686             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
687             if (idx == h1) {
688               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
689             }
690           }
691         }
692 #else
693         if (HT[idx] != key) {
694           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
695           if (idx == size) {
696             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
697             if (idx == h1) {
698               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
699             }
700           }
701         }
702 #endif
703         baij_a = HD[idx];
704         if (roworiented) {
705           /*value = v + i*(stepval+bs)*bs + j*bs;*/
706           /* value = v + (i*(stepval+bs)+j)*bs; */
707           value = v_t;
708           v_t  += bs;
709           if (addv == ADD_VALUES) {
710             for (ii=0; ii<bs; ii++,value+=stepval) {
711               for (jj=ii; jj<bs2; jj+=bs) {
712                 baij_a[jj]  += *value++;
713               }
714             }
715           } else {
716             for (ii=0; ii<bs; ii++,value+=stepval) {
717               for (jj=ii; jj<bs2; jj+=bs) {
718                 baij_a[jj]  = *value++;
719               }
720             }
721           }
722         } else {
723           value = v + j*(stepval+bs)*bs + i*bs;
724           if (addv == ADD_VALUES) {
725             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
726               for (jj=0; jj<bs; jj++) {
727                 baij_a[jj]  += *value++;
728               }
729             }
730           } else {
731             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
732               for (jj=0; jj<bs; jj++) {
733                 baij_a[jj]  = *value++;
734               }
735             }
736           }
737         }
738       }
739     } else {
740       if (!baij->donotstash) {
741         if (roworiented) {
742           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
743         } else {
744           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
745         }
746       }
747     }
748   }
749 #if defined(PETSC_USE_BOPT_g)
750   baij->ht_total_ct = total_ct;
751   baij->ht_insert_ct = insert_ct;
752 #endif
753   PetscFunctionReturn(0);
754 }
755 
756 #undef __FUNCT__
757 #define __FUNCT__ "MatGetValues_MPIBAIJ"
758 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
759 {
760   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
761   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
762   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
763 
764   PetscFunctionBegin;
765   for (i=0; i<m; i++) {
766     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
767     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
768     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
769       row = idxm[i] - bsrstart;
770       for (j=0; j<n; j++) {
771         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
772         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
773         if (idxn[j] >= bscstart && idxn[j] < bscend){
774           col = idxn[j] - bscstart;
775           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
776         } else {
777           if (!baij->colmap) {
778             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
779           }
780 #if defined (PETSC_USE_CTABLE)
781           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
782           data --;
783 #else
784           data = baij->colmap[idxn[j]/bs]-1;
785 #endif
786           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
787           else {
788             col  = data + idxn[j]%bs;
789             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
790           }
791         }
792       }
793     } else {
794       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
795     }
796   }
797  PetscFunctionReturn(0);
798 }
799 
800 #undef __FUNCT__
801 #define __FUNCT__ "MatNorm_MPIBAIJ"
802 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
803 {
804   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
805   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
806   int        ierr,i,bs2=baij->bs2;
807   PetscReal  sum = 0.0;
808   MatScalar  *v;
809 
810   PetscFunctionBegin;
811   if (baij->size == 1) {
812     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
813   } else {
814     if (type == NORM_FROBENIUS) {
815       v = amat->a;
816       for (i=0; i<amat->nz*bs2; i++) {
817 #if defined(PETSC_USE_COMPLEX)
818         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
819 #else
820         sum += (*v)*(*v); v++;
821 #endif
822       }
823       v = bmat->a;
824       for (i=0; i<bmat->nz*bs2; i++) {
825 #if defined(PETSC_USE_COMPLEX)
826         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
827 #else
828         sum += (*v)*(*v); v++;
829 #endif
830       }
831       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
832       *nrm = sqrt(*nrm);
833     } else {
834       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
835     }
836   }
837   PetscFunctionReturn(0);
838 }
839 
840 
841 /*
842   Creates the hash table, and sets the table
843   This table is created only once.
844   If new entried need to be added to the matrix
845   then the hash table has to be destroyed and
846   recreated.
847 */
848 #undef __FUNCT__
849 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
850 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
851 {
852   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
853   Mat         A = baij->A,B=baij->B;
854   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
855   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
856   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
857   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
858   int         *HT,key;
859   MatScalar   **HD;
860   PetscReal   tmp;
861 #if defined(PETSC_USE_BOPT_g)
862   int         ct=0,max=0;
863 #endif
864 
865   PetscFunctionBegin;
866   baij->ht_size=(int)(factor*nz);
867   size = baij->ht_size;
868 
869   if (baij->ht) {
870     PetscFunctionReturn(0);
871   }
872 
873   /* Allocate Memory for Hash Table */
874   ierr     = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
875   baij->ht = (int*)(baij->hd + size);
876   HD       = baij->hd;
877   HT       = baij->ht;
878 
879 
880   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));CHKERRQ(ierr);
881 
882 
883   /* Loop Over A */
884   for (i=0; i<a->mbs; i++) {
885     for (j=ai[i]; j<ai[i+1]; j++) {
886       row = i+rstart;
887       col = aj[j]+cstart;
888 
889       key = row*Nbs + col + 1;
890       h1  = HASH(size,key,tmp);
891       for (k=0; k<size; k++){
892         if (HT[(h1+k)%size] == 0.0) {
893           HT[(h1+k)%size] = key;
894           HD[(h1+k)%size] = a->a + j*bs2;
895           break;
896 #if defined(PETSC_USE_BOPT_g)
897         } else {
898           ct++;
899 #endif
900         }
901       }
902 #if defined(PETSC_USE_BOPT_g)
903       if (k> max) max = k;
904 #endif
905     }
906   }
907   /* Loop Over B */
908   for (i=0; i<b->mbs; i++) {
909     for (j=bi[i]; j<bi[i+1]; j++) {
910       row = i+rstart;
911       col = garray[bj[j]];
912       key = row*Nbs + col + 1;
913       h1  = HASH(size,key,tmp);
914       for (k=0; k<size; k++){
915         if (HT[(h1+k)%size] == 0.0) {
916           HT[(h1+k)%size] = key;
917           HD[(h1+k)%size] = b->a + j*bs2;
918           break;
919 #if defined(PETSC_USE_BOPT_g)
920         } else {
921           ct++;
922 #endif
923         }
924       }
925 #if defined(PETSC_USE_BOPT_g)
926       if (k> max) max = k;
927 #endif
928     }
929   }
930 
931   /* Print Summary */
932 #if defined(PETSC_USE_BOPT_g)
933   for (i=0,j=0; i<size; i++) {
934     if (HT[i]) {j++;}
935   }
936   PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
937 #endif
938   PetscFunctionReturn(0);
939 }
940 
941 #undef __FUNCT__
942 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
943 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
944 {
945   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
946   int         ierr,nstash,reallocs;
947   InsertMode  addv;
948 
949   PetscFunctionBegin;
950   if (baij->donotstash) {
951     PetscFunctionReturn(0);
952   }
953 
954   /* make sure all processors are either in INSERTMODE or ADDMODE */
955   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
956   if (addv == (ADD_VALUES|INSERT_VALUES)) {
957     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
958   }
959   mat->insertmode = addv; /* in case this processor had no cache */
960 
961   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
962   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
963   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
964   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
965   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
966   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
967   PetscFunctionReturn(0);
968 }
969 
970 EXTERN int MatUseDSCPACK_MPIBAIJ(Mat);
971 #undef __FUNCT__
972 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
973 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
974 {
975   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
976   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
977   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
978   int         *row,*col,other_disassembled;
979   PetscTruth  r1,r2,r3;
980   MatScalar   *val;
981   InsertMode  addv = mat->insertmode;
982 #if defined(PETSC_HAVE_DSCPACK)
983   PetscTruth   flag;
984 #endif
985 
986   PetscFunctionBegin;
987   if (!baij->donotstash) {
988     while (1) {
989       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
990       if (!flg) break;
991 
992       for (i=0; i<n;) {
993         /* Now identify the consecutive vals belonging to the same row */
994         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
995         if (j < n) ncols = j-i;
996         else       ncols = n-i;
997         /* Now assemble all these values with a single function call */
998         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
999         i = j;
1000       }
1001     }
1002     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1003     /* Now process the block-stash. Since the values are stashed column-oriented,
1004        set the roworiented flag to column oriented, and after MatSetValues()
1005        restore the original flags */
1006     r1 = baij->roworiented;
1007     r2 = a->roworiented;
1008     r3 = b->roworiented;
1009     baij->roworiented = PETSC_FALSE;
1010     a->roworiented    = PETSC_FALSE;
1011     b->roworiented    = PETSC_FALSE;
1012     while (1) {
1013       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1014       if (!flg) break;
1015 
1016       for (i=0; i<n;) {
1017         /* Now identify the consecutive vals belonging to the same row */
1018         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1019         if (j < n) ncols = j-i;
1020         else       ncols = n-i;
1021         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1022         i = j;
1023       }
1024     }
1025     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1026     baij->roworiented = r1;
1027     a->roworiented    = r2;
1028     b->roworiented    = r3;
1029   }
1030 
1031   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1032   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1033 
1034   /* determine if any processor has disassembled, if so we must
1035      also disassemble ourselfs, in order that we may reassemble. */
1036   /*
1037      if nonzero structure of submatrix B cannot change then we know that
1038      no processor disassembled thus we can skip this stuff
1039   */
1040   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1041     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1042     if (mat->was_assembled && !other_disassembled) {
1043       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1044     }
1045   }
1046 
1047   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1048     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1049   }
1050   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1051   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1052 
1053 #if defined(PETSC_USE_BOPT_g)
1054   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1055     PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
1056     baij->ht_total_ct  = 0;
1057     baij->ht_insert_ct = 0;
1058   }
1059 #endif
1060   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1061     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1062     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1063     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1064   }
1065 
1066   if (baij->rowvalues) {
1067     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1068     baij->rowvalues = 0;
1069   }
1070 #if defined(PETSC_HAVE_DSCPACK)
1071   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_baij_dscpack",&flag);CHKERRQ(ierr);
1072   if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(mat);CHKERRQ(ierr); }
1073 #endif
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer);
1078 
1079 #undef __FUNCT__
1080 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1081 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1082 {
1083   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1084   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
1085   PetscTruth        isascii,isdraw;
1086   PetscViewer       sviewer;
1087   PetscViewerFormat format;
1088 
1089   PetscFunctionBegin;
1090   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1091   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1092   if (isascii) {
1093     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1094     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
1095       MatInfo info;
1096       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1097       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1098       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1099               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
1100               baij->bs,(int)info.memory);CHKERRQ(ierr);
1101       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1102       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1103       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1104       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1105       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1106       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1107       PetscFunctionReturn(0);
1108     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1109       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
1110       PetscFunctionReturn(0);
1111     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1112 #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
1113       ierr = MatMPIBAIJFactorInfo_DSCPACK(mat,viewer);CHKERRQ(ierr);
1114 #endif
1115       PetscFunctionReturn(0);
1116     }
1117   }
1118 
1119   if (isdraw) {
1120     PetscDraw       draw;
1121     PetscTruth isnull;
1122     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1123     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1124   }
1125 
1126   if (size == 1) {
1127     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1128     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1129   } else {
1130     /* assemble the entire matrix onto first processor. */
1131     Mat         A;
1132     Mat_SeqBAIJ *Aloc;
1133     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1134     MatScalar   *a;
1135 
1136     if (!rank) {
1137       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1138     } else {
1139       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1140     }
1141     PetscLogObjectParent(mat,A);
1142 
1143     /* copy over the A part */
1144     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1145     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1146     ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
1147 
1148     for (i=0; i<mbs; i++) {
1149       rvals[0] = bs*(baij->rstart + i);
1150       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1151       for (j=ai[i]; j<ai[i+1]; j++) {
1152         col = (baij->cstart+aj[j])*bs;
1153         for (k=0; k<bs; k++) {
1154           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1155           col++; a += bs;
1156         }
1157       }
1158     }
1159     /* copy over the B part */
1160     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1161     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1162     for (i=0; i<mbs; i++) {
1163       rvals[0] = bs*(baij->rstart + i);
1164       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1165       for (j=ai[i]; j<ai[i+1]; j++) {
1166         col = baij->garray[aj[j]]*bs;
1167         for (k=0; k<bs; k++) {
1168           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1169           col++; a += bs;
1170         }
1171       }
1172     }
1173     ierr = PetscFree(rvals);CHKERRQ(ierr);
1174     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1175     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1176     /*
1177        Everyone has to call to draw the matrix since the graphics waits are
1178        synchronized across all processors that share the PetscDraw object
1179     */
1180     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1181     if (!rank) {
1182       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1183       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1184     }
1185     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1186     ierr = MatDestroy(A);CHKERRQ(ierr);
1187   }
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 #undef __FUNCT__
1192 #define __FUNCT__ "MatView_MPIBAIJ"
1193 int MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1194 {
1195   int        ierr;
1196   PetscTruth isascii,isdraw,issocket,isbinary;
1197 
1198   PetscFunctionBegin;
1199   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1200   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1201   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1202   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1203   if (isascii || isdraw || issocket || isbinary) {
1204     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1205   } else {
1206     SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1207   }
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 #undef __FUNCT__
1212 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1213 int MatDestroy_MPIBAIJ(Mat mat)
1214 {
1215   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1216   int         ierr;
1217 
1218   PetscFunctionBegin;
1219 #if defined(PETSC_USE_LOG)
1220   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
1221 #endif
1222   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1223   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1224   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1225   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1226   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1227 #if defined (PETSC_USE_CTABLE)
1228   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1229 #else
1230   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1231 #endif
1232   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1233   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1234   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1235   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1236   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1237   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1238 #if defined(PETSC_USE_MAT_SINGLE)
1239   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1240 #endif
1241   ierr = PetscFree(baij);CHKERRQ(ierr);
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 #undef __FUNCT__
1246 #define __FUNCT__ "MatMult_MPIBAIJ"
1247 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1248 {
1249   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1250   int         ierr,nt;
1251 
1252   PetscFunctionBegin;
1253   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1254   if (nt != A->n) {
1255     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1256   }
1257   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1258   if (nt != A->m) {
1259     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1260   }
1261   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1262   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1263   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1264   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1265   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #undef __FUNCT__
1270 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1271 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1272 {
1273   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1274   int        ierr;
1275 
1276   PetscFunctionBegin;
1277   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1278   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1279   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1280   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1281   PetscFunctionReturn(0);
1282 }
1283 
1284 #undef __FUNCT__
1285 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1286 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1287 {
1288   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1289   int         ierr;
1290 
1291   PetscFunctionBegin;
1292   /* do nondiagonal part */
1293   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1294   /* send it on its way */
1295   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1296   /* do local part */
1297   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1298   /* receive remote parts: note this assumes the values are not actually */
1299   /* inserted in yy until the next line, which is true for my implementation*/
1300   /* but is not perhaps always true. */
1301   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1307 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1308 {
1309   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1310   int         ierr;
1311 
1312   PetscFunctionBegin;
1313   /* do nondiagonal part */
1314   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1315   /* send it on its way */
1316   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1317   /* do local part */
1318   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1319   /* receive remote parts: note this assumes the values are not actually */
1320   /* inserted in yy until the next line, which is true for my implementation*/
1321   /* but is not perhaps always true. */
1322   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 /*
1327   This only works correctly for square matrices where the subblock A->A is the
1328    diagonal block
1329 */
1330 #undef __FUNCT__
1331 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1332 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1333 {
1334   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1335   int         ierr;
1336 
1337   PetscFunctionBegin;
1338   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1339   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 #undef __FUNCT__
1344 #define __FUNCT__ "MatScale_MPIBAIJ"
1345 int MatScale_MPIBAIJ(PetscScalar *aa,Mat A)
1346 {
1347   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1348   int         ierr;
1349 
1350   PetscFunctionBegin;
1351   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1352   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1353   PetscFunctionReturn(0);
1354 }
1355 
1356 #undef __FUNCT__
1357 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1358 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1359 {
1360   Mat_MPIBAIJ  *mat = (Mat_MPIBAIJ*)matin->data;
1361   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1362   int          bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1363   int          nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1364   int          *cmap,*idx_p,cstart = mat->cstart;
1365 
1366   PetscFunctionBegin;
1367   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1368   mat->getrowactive = PETSC_TRUE;
1369 
1370   if (!mat->rowvalues && (idx || v)) {
1371     /*
1372         allocate enough space to hold information from the longest row.
1373     */
1374     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1375     int     max = 1,mbs = mat->mbs,tmp;
1376     for (i=0; i<mbs; i++) {
1377       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1378       if (max < tmp) { max = tmp; }
1379     }
1380     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1381     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1382   }
1383 
1384   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1385   lrow = row - brstart;
1386 
1387   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1388   if (!v)   {pvA = 0; pvB = 0;}
1389   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1390   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1391   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1392   nztot = nzA + nzB;
1393 
1394   cmap  = mat->garray;
1395   if (v  || idx) {
1396     if (nztot) {
1397       /* Sort by increasing column numbers, assuming A and B already sorted */
1398       int imark = -1;
1399       if (v) {
1400         *v = v_p = mat->rowvalues;
1401         for (i=0; i<nzB; i++) {
1402           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1403           else break;
1404         }
1405         imark = i;
1406         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1407         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1408       }
1409       if (idx) {
1410         *idx = idx_p = mat->rowindices;
1411         if (imark > -1) {
1412           for (i=0; i<imark; i++) {
1413             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1414           }
1415         } else {
1416           for (i=0; i<nzB; i++) {
1417             if (cmap[cworkB[i]/bs] < cstart)
1418               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1419             else break;
1420           }
1421           imark = i;
1422         }
1423         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1424         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1425       }
1426     } else {
1427       if (idx) *idx = 0;
1428       if (v)   *v   = 0;
1429     }
1430   }
1431   *nz = nztot;
1432   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1433   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1434   PetscFunctionReturn(0);
1435 }
1436 
1437 #undef __FUNCT__
1438 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1439 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1440 {
1441   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1442 
1443   PetscFunctionBegin;
1444   if (baij->getrowactive == PETSC_FALSE) {
1445     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1446   }
1447   baij->getrowactive = PETSC_FALSE;
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 #undef __FUNCT__
1452 #define __FUNCT__ "MatGetBlockSize_MPIBAIJ"
1453 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1454 {
1455   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1456 
1457   PetscFunctionBegin;
1458   *bs = baij->bs;
1459   PetscFunctionReturn(0);
1460 }
1461 
1462 #undef __FUNCT__
1463 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1464 int MatZeroEntries_MPIBAIJ(Mat A)
1465 {
1466   Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1467   int         ierr;
1468 
1469   PetscFunctionBegin;
1470   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1471   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 #undef __FUNCT__
1476 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1477 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1478 {
1479   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
1480   Mat         A = a->A,B = a->B;
1481   int         ierr;
1482   PetscReal   isend[5],irecv[5];
1483 
1484   PetscFunctionBegin;
1485   info->block_size     = (PetscReal)a->bs;
1486   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1487   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1488   isend[3] = info->memory;  isend[4] = info->mallocs;
1489   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1490   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1491   isend[3] += info->memory;  isend[4] += info->mallocs;
1492   if (flag == MAT_LOCAL) {
1493     info->nz_used      = isend[0];
1494     info->nz_allocated = isend[1];
1495     info->nz_unneeded  = isend[2];
1496     info->memory       = isend[3];
1497     info->mallocs      = isend[4];
1498   } else if (flag == MAT_GLOBAL_MAX) {
1499     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1500     info->nz_used      = irecv[0];
1501     info->nz_allocated = irecv[1];
1502     info->nz_unneeded  = irecv[2];
1503     info->memory       = irecv[3];
1504     info->mallocs      = irecv[4];
1505   } else if (flag == MAT_GLOBAL_SUM) {
1506     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1507     info->nz_used      = irecv[0];
1508     info->nz_allocated = irecv[1];
1509     info->nz_unneeded  = irecv[2];
1510     info->memory       = irecv[3];
1511     info->mallocs      = irecv[4];
1512   } else {
1513     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1514   }
1515   info->rows_global       = (PetscReal)A->M;
1516   info->columns_global    = (PetscReal)A->N;
1517   info->rows_local        = (PetscReal)A->m;
1518   info->columns_local     = (PetscReal)A->N;
1519   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1520   info->fill_ratio_needed = 0;
1521   info->factor_mallocs    = 0;
1522   PetscFunctionReturn(0);
1523 }
1524 
1525 #undef __FUNCT__
1526 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1527 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1528 {
1529   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1530   int         ierr;
1531 
1532   PetscFunctionBegin;
1533   switch (op) {
1534   case MAT_NO_NEW_NONZERO_LOCATIONS:
1535   case MAT_YES_NEW_NONZERO_LOCATIONS:
1536   case MAT_COLUMNS_UNSORTED:
1537   case MAT_COLUMNS_SORTED:
1538   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1539   case MAT_KEEP_ZEROED_ROWS:
1540   case MAT_NEW_NONZERO_LOCATION_ERR:
1541     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1542     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1543     break;
1544   case MAT_ROW_ORIENTED:
1545     a->roworiented = PETSC_TRUE;
1546     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1547     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1548     break;
1549   case MAT_ROWS_SORTED:
1550   case MAT_ROWS_UNSORTED:
1551   case MAT_YES_NEW_DIAGONALS:
1552   case MAT_USE_SINGLE_PRECISION_SOLVES:
1553     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1554     break;
1555   case MAT_COLUMN_ORIENTED:
1556     a->roworiented = PETSC_FALSE;
1557     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1558     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1559     break;
1560   case MAT_IGNORE_OFF_PROC_ENTRIES:
1561     a->donotstash = PETSC_TRUE;
1562     break;
1563   case MAT_NO_NEW_DIAGONALS:
1564     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1565   case MAT_USE_HASH_TABLE:
1566     a->ht_flag = PETSC_TRUE;
1567     break;
1568   default:
1569     SETERRQ(PETSC_ERR_SUP,"unknown option");
1570   }
1571   PetscFunctionReturn(0);
1572 }
1573 
1574 #undef __FUNCT__
1575 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1576 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1577 {
1578   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1579   Mat_SeqBAIJ *Aloc;
1580   Mat         B;
1581   int         ierr,M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1582   int         bs=baij->bs,mbs=baij->mbs;
1583   MatScalar   *a;
1584 
1585   PetscFunctionBegin;
1586   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1587   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1588 
1589   /* copy over the A part */
1590   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1591   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1592   ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
1593 
1594   for (i=0; i<mbs; i++) {
1595     rvals[0] = bs*(baij->rstart + i);
1596     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1597     for (j=ai[i]; j<ai[i+1]; j++) {
1598       col = (baij->cstart+aj[j])*bs;
1599       for (k=0; k<bs; k++) {
1600         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1601         col++; a += bs;
1602       }
1603     }
1604   }
1605   /* copy over the B part */
1606   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1607   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1608   for (i=0; i<mbs; i++) {
1609     rvals[0] = bs*(baij->rstart + i);
1610     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1611     for (j=ai[i]; j<ai[i+1]; j++) {
1612       col = baij->garray[aj[j]]*bs;
1613       for (k=0; k<bs; k++) {
1614         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1615         col++; a += bs;
1616       }
1617     }
1618   }
1619   ierr = PetscFree(rvals);CHKERRQ(ierr);
1620   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1621   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1622 
1623   if (matout) {
1624     *matout = B;
1625   } else {
1626     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1627   }
1628   PetscFunctionReturn(0);
1629 }
1630 
1631 #undef __FUNCT__
1632 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1633 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1634 {
1635   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1636   Mat         a = baij->A,b = baij->B;
1637   int         ierr,s1,s2,s3;
1638 
1639   PetscFunctionBegin;
1640   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1641   if (rr) {
1642     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1643     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1644     /* Overlap communication with computation. */
1645     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1646   }
1647   if (ll) {
1648     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1649     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1650     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1651   }
1652   /* scale  the diagonal block */
1653   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1654 
1655   if (rr) {
1656     /* Do a scatter end and then right scale the off-diagonal block */
1657     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1658     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1659   }
1660 
1661   PetscFunctionReturn(0);
1662 }
1663 
1664 #undef __FUNCT__
1665 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1666 int MatZeroRows_MPIBAIJ(Mat A,IS is,PetscScalar *diag)
1667 {
1668   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1669   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1670   int            *procs,*nprocs,j,idx,nsends,*work,row;
1671   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1672   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1673   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1674   MPI_Comm       comm = A->comm;
1675   MPI_Request    *send_waits,*recv_waits;
1676   MPI_Status     recv_status,*send_status;
1677   IS             istmp;
1678   PetscTruth     found;
1679 
1680   PetscFunctionBegin;
1681   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
1682   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1683 
1684   /*  first count number of contributors to each processor */
1685   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
1686   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1687   procs = nprocs + size;
1688   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
1689   for (i=0; i<N; i++) {
1690     idx   = rows[i];
1691     found = PETSC_FALSE;
1692     for (j=0; j<size; j++) {
1693       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1694         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
1695       }
1696     }
1697     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1698   }
1699   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
1700 
1701   /* inform other processors of number of messages and max length*/
1702   ierr   = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
1703   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
1704   nmax   = work[rank];
1705   nrecvs = work[size+rank];
1706   ierr   = PetscFree(work);CHKERRQ(ierr);
1707 
1708   /* post receives:   */
1709   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
1710   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1711   for (i=0; i<nrecvs; i++) {
1712     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1713   }
1714 
1715   /* do sends:
1716      1) starts[i] gives the starting index in svalues for stuff going to
1717      the ith processor
1718   */
1719   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
1720   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1721   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
1722   starts[0]  = 0;
1723   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1724   for (i=0; i<N; i++) {
1725     svalues[starts[owner[i]]++] = rows[i];
1726   }
1727   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1728 
1729   starts[0] = 0;
1730   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1731   count = 0;
1732   for (i=0; i<size; i++) {
1733     if (procs[i]) {
1734       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1735     }
1736   }
1737   ierr = PetscFree(starts);CHKERRQ(ierr);
1738 
1739   base = owners[rank]*bs;
1740 
1741   /*  wait on receives */
1742   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
1743   source = lens + nrecvs;
1744   count  = nrecvs; slen = 0;
1745   while (count) {
1746     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1747     /* unpack receives into our local space */
1748     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1749     source[imdex]  = recv_status.MPI_SOURCE;
1750     lens[imdex]    = n;
1751     slen          += n;
1752     count--;
1753   }
1754   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1755 
1756   /* move the data into the send scatter */
1757   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
1758   count = 0;
1759   for (i=0; i<nrecvs; i++) {
1760     values = rvalues + i*nmax;
1761     for (j=0; j<lens[i]; j++) {
1762       lrows[count++] = values[j] - base;
1763     }
1764   }
1765   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1766   ierr = PetscFree(lens);CHKERRQ(ierr);
1767   ierr = PetscFree(owner);CHKERRQ(ierr);
1768   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1769 
1770   /* actually zap the local rows */
1771   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1772   PetscLogObjectParent(A,istmp);
1773 
1774   /*
1775         Zero the required rows. If the "diagonal block" of the matrix
1776      is square and the user wishes to set the diagonal we use seperate
1777      code so that MatSetValues() is not called for each diagonal allocating
1778      new memory, thus calling lots of mallocs and slowing things down.
1779 
1780        Contributed by: Mathew Knepley
1781   */
1782   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1783   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1784   if (diag && (l->A->M == l->A->N)) {
1785     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1786   } else if (diag) {
1787     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1788     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1789       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1790 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1791     }
1792     for (i=0; i<slen; i++) {
1793       row  = lrows[i] + rstart_bs;
1794       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1795     }
1796     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1797     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1798   } else {
1799     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1800   }
1801 
1802   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1803   ierr = PetscFree(lrows);CHKERRQ(ierr);
1804 
1805   /* wait on sends */
1806   if (nsends) {
1807     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1808     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1809     ierr = PetscFree(send_status);CHKERRQ(ierr);
1810   }
1811   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1812   ierr = PetscFree(svalues);CHKERRQ(ierr);
1813 
1814   PetscFunctionReturn(0);
1815 }
1816 
1817 #undef __FUNCT__
1818 #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1819 int MatPrintHelp_MPIBAIJ(Mat A)
1820 {
1821   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1822   MPI_Comm    comm = A->comm;
1823   static int  called = 0;
1824   int         ierr;
1825 
1826   PetscFunctionBegin;
1827   if (!a->rank) {
1828     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1829   }
1830   if (called) {PetscFunctionReturn(0);} else called = 1;
1831   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1832   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1833   PetscFunctionReturn(0);
1834 }
1835 
1836 #undef __FUNCT__
1837 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1838 int MatSetUnfactored_MPIBAIJ(Mat A)
1839 {
1840   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1841   int         ierr;
1842 
1843   PetscFunctionBegin;
1844   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1845   PetscFunctionReturn(0);
1846 }
1847 
1848 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1849 
1850 #undef __FUNCT__
1851 #define __FUNCT__ "MatEqual_MPIBAIJ"
1852 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1853 {
1854   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1855   Mat         a,b,c,d;
1856   PetscTruth  flg;
1857   int         ierr;
1858 
1859   PetscFunctionBegin;
1860   ierr = PetscTypeCompare((PetscObject)B,MATMPIBAIJ,&flg);CHKERRQ(ierr);
1861   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1862   a = matA->A; b = matA->B;
1863   c = matB->A; d = matB->B;
1864 
1865   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1866   if (flg == PETSC_TRUE) {
1867     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1868   }
1869   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1870   PetscFunctionReturn(0);
1871 }
1872 
1873 
1874 #undef __FUNCT__
1875 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1876 int MatSetUpPreallocation_MPIBAIJ(Mat A)
1877 {
1878   int        ierr;
1879 
1880   PetscFunctionBegin;
1881   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1882   PetscFunctionReturn(0);
1883 }
1884 
1885 /* -------------------------------------------------------------------*/
1886 static struct _MatOps MatOps_Values = {
1887   MatSetValues_MPIBAIJ,
1888   MatGetRow_MPIBAIJ,
1889   MatRestoreRow_MPIBAIJ,
1890   MatMult_MPIBAIJ,
1891   MatMultAdd_MPIBAIJ,
1892   MatMultTranspose_MPIBAIJ,
1893   MatMultTransposeAdd_MPIBAIJ,
1894   0,
1895   0,
1896   0,
1897   0,
1898   0,
1899   0,
1900   0,
1901   MatTranspose_MPIBAIJ,
1902   MatGetInfo_MPIBAIJ,
1903   MatEqual_MPIBAIJ,
1904   MatGetDiagonal_MPIBAIJ,
1905   MatDiagonalScale_MPIBAIJ,
1906   MatNorm_MPIBAIJ,
1907   MatAssemblyBegin_MPIBAIJ,
1908   MatAssemblyEnd_MPIBAIJ,
1909   0,
1910   MatSetOption_MPIBAIJ,
1911   MatZeroEntries_MPIBAIJ,
1912   MatZeroRows_MPIBAIJ,
1913   0,
1914   0,
1915   0,
1916   0,
1917   MatSetUpPreallocation_MPIBAIJ,
1918   0,
1919   0,
1920   0,
1921   0,
1922   MatDuplicate_MPIBAIJ,
1923   0,
1924   0,
1925   0,
1926   0,
1927   0,
1928   MatGetSubMatrices_MPIBAIJ,
1929   MatIncreaseOverlap_MPIBAIJ,
1930   MatGetValues_MPIBAIJ,
1931   0,
1932   MatPrintHelp_MPIBAIJ,
1933   MatScale_MPIBAIJ,
1934   0,
1935   0,
1936   0,
1937   MatGetBlockSize_MPIBAIJ,
1938   0,
1939   0,
1940   0,
1941   0,
1942   0,
1943   0,
1944   MatSetUnfactored_MPIBAIJ,
1945   0,
1946   MatSetValuesBlocked_MPIBAIJ,
1947   0,
1948   MatDestroy_MPIBAIJ,
1949   MatView_MPIBAIJ,
1950   MatGetPetscMaps_Petsc,
1951   0,
1952   0,
1953   0,
1954   0,
1955   0,
1956   0,
1957   MatGetRowMax_MPIBAIJ};
1958 
1959 
1960 EXTERN_C_BEGIN
1961 #undef __FUNCT__
1962 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
1963 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1964 {
1965   PetscFunctionBegin;
1966   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1967   *iscopy = PETSC_FALSE;
1968   PetscFunctionReturn(0);
1969 }
1970 EXTERN_C_END
1971 
1972 EXTERN_C_BEGIN
1973 #undef __FUNCT__
1974 #define __FUNCT__ "MatCreate_MPIBAIJ"
1975 int MatCreate_MPIBAIJ(Mat B)
1976 {
1977   Mat_MPIBAIJ  *b;
1978   int          ierr;
1979   PetscTruth   flg;
1980 
1981   PetscFunctionBegin;
1982 
1983   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
1984   B->data = (void*)b;
1985 
1986   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
1987   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1988   B->mapping    = 0;
1989   B->factor     = 0;
1990   B->assembled  = PETSC_FALSE;
1991 
1992   B->insertmode = NOT_SET_VALUES;
1993   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1994   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1995 
1996   /* build local table of row and column ownerships */
1997   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1998   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
1999   b->cowners    = b->rowners + b->size + 2;
2000   b->rowners_bs = b->cowners + b->size + 2;
2001 
2002   /* build cache for off array entries formed */
2003   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2004   b->donotstash  = PETSC_FALSE;
2005   b->colmap      = PETSC_NULL;
2006   b->garray      = PETSC_NULL;
2007   b->roworiented = PETSC_TRUE;
2008 
2009 #if defined(PETSC_USE_MAT_SINGLE)
2010   /* stuff for MatSetValues_XXX in single precision */
2011   b->setvalueslen     = 0;
2012   b->setvaluescopy    = PETSC_NULL;
2013 #endif
2014 
2015   /* stuff used in block assembly */
2016   b->barray       = 0;
2017 
2018   /* stuff used for matrix vector multiply */
2019   b->lvec         = 0;
2020   b->Mvctx        = 0;
2021 
2022   /* stuff for MatGetRow() */
2023   b->rowindices   = 0;
2024   b->rowvalues    = 0;
2025   b->getrowactive = PETSC_FALSE;
2026 
2027   /* hash table stuff */
2028   b->ht           = 0;
2029   b->hd           = 0;
2030   b->ht_size      = 0;
2031   b->ht_flag      = PETSC_FALSE;
2032   b->ht_fact      = 0;
2033   b->ht_total_ct  = 0;
2034   b->ht_insert_ct = 0;
2035 
2036   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2037   if (flg) {
2038     PetscReal fact = 1.39;
2039     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2040     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2041     if (fact <= 1.0) fact = 1.39;
2042     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2043     PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2044   }
2045   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2046                                      "MatStoreValues_MPIBAIJ",
2047                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2048   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2049                                      "MatRetrieveValues_MPIBAIJ",
2050                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2051   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2052                                      "MatGetDiagonalBlock_MPIBAIJ",
2053                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2054   PetscFunctionReturn(0);
2055 }
2056 EXTERN_C_END
2057 
2058 #undef __FUNCT__
2059 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2060 /*@C
2061    MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format
2062    (block compressed row).  For good matrix assembly performance
2063    the user should preallocate the matrix storage by setting the parameters
2064    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2065    performance can be increased by more than a factor of 50.
2066 
2067    Collective on Mat
2068 
2069    Input Parameters:
2070 +  A - the matrix
2071 .  bs   - size of blockk
2072 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2073            submatrix  (same for all local rows)
2074 .  d_nnz - array containing the number of block nonzeros in the various block rows
2075            of the in diagonal portion of the local (possibly different for each block
2076            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2077 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2078            submatrix (same for all local rows).
2079 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2080            off-diagonal portion of the local submatrix (possibly different for
2081            each block row) or PETSC_NULL.
2082 
2083    Output Parameter:
2084 
2085 
2086    Options Database Keys:
2087 .   -mat_no_unroll - uses code that does not unroll the loops in the
2088                      block calculations (much slower)
2089 .   -mat_block_size - size of the blocks to use
2090 
2091    Notes:
2092    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2093    than it must be used on all processors that share the object for that argument.
2094 
2095    Storage Information:
2096    For a square global matrix we define each processor's diagonal portion
2097    to be its local rows and the corresponding columns (a square submatrix);
2098    each processor's off-diagonal portion encompasses the remainder of the
2099    local matrix (a rectangular submatrix).
2100 
2101    The user can specify preallocated storage for the diagonal part of
2102    the local submatrix with either d_nz or d_nnz (not both).  Set
2103    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2104    memory allocation.  Likewise, specify preallocated storage for the
2105    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2106 
2107    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2108    the figure below we depict these three local rows and all columns (0-11).
2109 
2110 .vb
2111            0 1 2 3 4 5 6 7 8 9 10 11
2112           -------------------
2113    row 3  |  o o o d d d o o o o o o
2114    row 4  |  o o o d d d o o o o o o
2115    row 5  |  o o o d d d o o o o o o
2116           -------------------
2117 .ve
2118 
2119    Thus, any entries in the d locations are stored in the d (diagonal)
2120    submatrix, and any entries in the o locations are stored in the
2121    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2122    stored simply in the MATSEQBAIJ format for compressed row storage.
2123 
2124    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2125    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2126    In general, for PDE problems in which most nonzeros are near the diagonal,
2127    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2128    or you will get TERRIBLE performance; see the users' manual chapter on
2129    matrices.
2130 
2131    Level: intermediate
2132 
2133 .keywords: matrix, block, aij, compressed row, sparse, parallel
2134 
2135 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2136 @*/
2137 int MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2138 {
2139   Mat_MPIBAIJ  *b;
2140   int          ierr,i;
2141   PetscTruth   flg2;
2142 
2143   PetscFunctionBegin;
2144   ierr = PetscTypeCompare((PetscObject)B,MATMPIBAIJ,&flg2);CHKERRQ(ierr);
2145   if (!flg2) PetscFunctionReturn(0);
2146 
2147   B->preallocated = PETSC_TRUE;
2148   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2149 
2150   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2151   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2152   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2153   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2154   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2155   if (d_nnz) {
2156   for (i=0; i<B->m/bs; i++) {
2157       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
2158     }
2159   }
2160   if (o_nnz) {
2161     for (i=0; i<B->m/bs; i++) {
2162       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
2163     }
2164   }
2165 
2166   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2167   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2168   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2169   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2170 
2171   b = (Mat_MPIBAIJ*)B->data;
2172   b->bs  = bs;
2173   b->bs2 = bs*bs;
2174   b->mbs = B->m/bs;
2175   b->nbs = B->n/bs;
2176   b->Mbs = B->M/bs;
2177   b->Nbs = B->N/bs;
2178 
2179   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2180   b->rowners[0]    = 0;
2181   for (i=2; i<=b->size; i++) {
2182     b->rowners[i] += b->rowners[i-1];
2183   }
2184   b->rstart    = b->rowners[b->rank];
2185   b->rend      = b->rowners[b->rank+1];
2186 
2187   ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2188   b->cowners[0] = 0;
2189   for (i=2; i<=b->size; i++) {
2190     b->cowners[i] += b->cowners[i-1];
2191   }
2192   b->cstart    = b->cowners[b->rank];
2193   b->cend      = b->cowners[b->rank+1];
2194 
2195   for (i=0; i<=b->size; i++) {
2196     b->rowners_bs[i] = b->rowners[i]*bs;
2197   }
2198   b->rstart_bs = b->rstart*bs;
2199   b->rend_bs   = b->rend*bs;
2200   b->cstart_bs = b->cstart*bs;
2201   b->cend_bs   = b->cend*bs;
2202 
2203   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2204   PetscLogObjectParent(B,b->A);
2205   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2206   PetscLogObjectParent(B,b->B);
2207   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2208 
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 #undef __FUNCT__
2213 #define __FUNCT__ "MatCreateMPIBAIJ"
2214 /*@C
2215    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2216    (block compressed row).  For good matrix assembly performance
2217    the user should preallocate the matrix storage by setting the parameters
2218    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2219    performance can be increased by more than a factor of 50.
2220 
2221    Collective on MPI_Comm
2222 
2223    Input Parameters:
2224 +  comm - MPI communicator
2225 .  bs   - size of blockk
2226 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2227            This value should be the same as the local size used in creating the
2228            y vector for the matrix-vector product y = Ax.
2229 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2230            This value should be the same as the local size used in creating the
2231            x vector for the matrix-vector product y = Ax.
2232 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2233 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2234 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2235            submatrix  (same for all local rows)
2236 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2237            of the in diagonal portion of the local (possibly different for each block
2238            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2239 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2240            submatrix (same for all local rows).
2241 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2242            off-diagonal portion of the local submatrix (possibly different for
2243            each block row) or PETSC_NULL.
2244 
2245    Output Parameter:
2246 .  A - the matrix
2247 
2248    Options Database Keys:
2249 .   -mat_no_unroll - uses code that does not unroll the loops in the
2250                      block calculations (much slower)
2251 .   -mat_block_size - size of the blocks to use
2252 
2253    Notes:
2254    A nonzero block is any block that as 1 or more nonzeros in it
2255 
2256    The user MUST specify either the local or global matrix dimensions
2257    (possibly both).
2258 
2259    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2260    than it must be used on all processors that share the object for that argument.
2261 
2262    Storage Information:
2263    For a square global matrix we define each processor's diagonal portion
2264    to be its local rows and the corresponding columns (a square submatrix);
2265    each processor's off-diagonal portion encompasses the remainder of the
2266    local matrix (a rectangular submatrix).
2267 
2268    The user can specify preallocated storage for the diagonal part of
2269    the local submatrix with either d_nz or d_nnz (not both).  Set
2270    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2271    memory allocation.  Likewise, specify preallocated storage for the
2272    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2273 
2274    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2275    the figure below we depict these three local rows and all columns (0-11).
2276 
2277 .vb
2278            0 1 2 3 4 5 6 7 8 9 10 11
2279           -------------------
2280    row 3  |  o o o d d d o o o o o o
2281    row 4  |  o o o d d d o o o o o o
2282    row 5  |  o o o d d d o o o o o o
2283           -------------------
2284 .ve
2285 
2286    Thus, any entries in the d locations are stored in the d (diagonal)
2287    submatrix, and any entries in the o locations are stored in the
2288    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2289    stored simply in the MATSEQBAIJ format for compressed row storage.
2290 
2291    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2292    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2293    In general, for PDE problems in which most nonzeros are near the diagonal,
2294    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2295    or you will get TERRIBLE performance; see the users' manual chapter on
2296    matrices.
2297 
2298    Level: intermediate
2299 
2300 .keywords: matrix, block, aij, compressed row, sparse, parallel
2301 
2302 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2303 @*/
2304 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2305 {
2306   int ierr,size;
2307 
2308   PetscFunctionBegin;
2309   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2310   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2311   if (size > 1) {
2312     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2313     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2314   } else {
2315     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2316     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2317   }
2318   PetscFunctionReturn(0);
2319 }
2320 
2321 #undef __FUNCT__
2322 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2323 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2324 {
2325   Mat         mat;
2326   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2327   int         ierr,len=0;
2328 
2329   PetscFunctionBegin;
2330   *newmat       = 0;
2331   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2332   ierr = MatSetType(mat,MATMPIBAIJ);CHKERRQ(ierr);
2333   mat->preallocated = PETSC_TRUE;
2334   mat->assembled    = PETSC_TRUE;
2335   a      = (Mat_MPIBAIJ*)mat->data;
2336   a->bs  = oldmat->bs;
2337   a->bs2 = oldmat->bs2;
2338   a->mbs = oldmat->mbs;
2339   a->nbs = oldmat->nbs;
2340   a->Mbs = oldmat->Mbs;
2341   a->Nbs = oldmat->Nbs;
2342 
2343   a->rstart       = oldmat->rstart;
2344   a->rend         = oldmat->rend;
2345   a->cstart       = oldmat->cstart;
2346   a->cend         = oldmat->cend;
2347   a->size         = oldmat->size;
2348   a->rank         = oldmat->rank;
2349   a->donotstash   = oldmat->donotstash;
2350   a->roworiented  = oldmat->roworiented;
2351   a->rowindices   = 0;
2352   a->rowvalues    = 0;
2353   a->getrowactive = PETSC_FALSE;
2354   a->barray       = 0;
2355   a->rstart_bs    = oldmat->rstart_bs;
2356   a->rend_bs      = oldmat->rend_bs;
2357   a->cstart_bs    = oldmat->cstart_bs;
2358   a->cend_bs      = oldmat->cend_bs;
2359 
2360   /* hash table stuff */
2361   a->ht           = 0;
2362   a->hd           = 0;
2363   a->ht_size      = 0;
2364   a->ht_flag      = oldmat->ht_flag;
2365   a->ht_fact      = oldmat->ht_fact;
2366   a->ht_total_ct  = 0;
2367   a->ht_insert_ct = 0;
2368 
2369   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2370   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2371   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2372   if (oldmat->colmap) {
2373 #if defined (PETSC_USE_CTABLE)
2374   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2375 #else
2376   ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2377   PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2378   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2379 #endif
2380   } else a->colmap = 0;
2381   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2382     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2383     PetscLogObjectMemory(mat,len*sizeof(int));
2384     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2385   } else a->garray = 0;
2386 
2387   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2388   PetscLogObjectParent(mat,a->lvec);
2389   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2390 
2391   PetscLogObjectParent(mat,a->Mvctx);
2392   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2393   PetscLogObjectParent(mat,a->A);
2394   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2395   PetscLogObjectParent(mat,a->B);
2396   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2397   *newmat = mat;
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 #include "petscsys.h"
2402 
2403 EXTERN_C_BEGIN
2404 #undef __FUNCT__
2405 #define __FUNCT__ "MatLoad_MPIBAIJ"
2406 int MatLoad_MPIBAIJ(PetscViewer viewer,MatType type,Mat *newmat)
2407 {
2408   Mat          A;
2409   int          i,nz,ierr,j,rstart,rend,fd;
2410   PetscScalar  *vals,*buf;
2411   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2412   MPI_Status   status;
2413   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2414   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2415   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2416   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2417   int          dcount,kmax,k,nzcount,tmp;
2418 
2419   PetscFunctionBegin;
2420   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2421 
2422   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2423   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2424   if (!rank) {
2425     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2426     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2427     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2428     if (header[3] < 0) {
2429       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ");
2430     }
2431   }
2432 
2433   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2434   M = header[1]; N = header[2];
2435 
2436   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2437 
2438   /*
2439      This code adds extra rows to make sure the number of rows is
2440      divisible by the blocksize
2441   */
2442   Mbs        = M/bs;
2443   extra_rows = bs - M + bs*(Mbs);
2444   if (extra_rows == bs) extra_rows = 0;
2445   else                  Mbs++;
2446   if (extra_rows &&!rank) {
2447     PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2448   }
2449 
2450   /* determine ownership of all rows */
2451   mbs        = Mbs/size + ((Mbs % size) > rank);
2452   m          = mbs*bs;
2453   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2454   browners   = rowners + size + 1;
2455   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2456   rowners[0] = 0;
2457   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2458   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2459   rstart = rowners[rank];
2460   rend   = rowners[rank+1];
2461 
2462   /* distribute row lengths to all processors */
2463   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2464   if (!rank) {
2465     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2466     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2467     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2468     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2469     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2470     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2471     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2472   } else {
2473     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2474   }
2475 
2476   if (!rank) {
2477     /* calculate the number of nonzeros on each processor */
2478     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2479     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2480     for (i=0; i<size; i++) {
2481       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2482         procsnz[i] += rowlengths[j];
2483       }
2484     }
2485     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2486 
2487     /* determine max buffer needed and allocate it */
2488     maxnz = 0;
2489     for (i=0; i<size; i++) {
2490       maxnz = PetscMax(maxnz,procsnz[i]);
2491     }
2492     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2493 
2494     /* read in my part of the matrix column indices  */
2495     nz     = procsnz[0];
2496     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2497     mycols = ibuf;
2498     if (size == 1)  nz -= extra_rows;
2499     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2500     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2501 
2502     /* read in every ones (except the last) and ship off */
2503     for (i=1; i<size-1; i++) {
2504       nz   = procsnz[i];
2505       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2506       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2507     }
2508     /* read in the stuff for the last proc */
2509     if (size != 1) {
2510       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2511       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2512       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2513       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2514     }
2515     ierr = PetscFree(cols);CHKERRQ(ierr);
2516   } else {
2517     /* determine buffer space needed for message */
2518     nz = 0;
2519     for (i=0; i<m; i++) {
2520       nz += locrowlens[i];
2521     }
2522     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2523     mycols = ibuf;
2524     /* receive message of column indices*/
2525     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2526     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2527     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2528   }
2529 
2530   /* loop over local rows, determining number of off diagonal entries */
2531   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2532   odlens   = dlens + (rend-rstart);
2533   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2534   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2535   masked1  = mask    + Mbs;
2536   masked2  = masked1 + Mbs;
2537   rowcount = 0; nzcount = 0;
2538   for (i=0; i<mbs; i++) {
2539     dcount  = 0;
2540     odcount = 0;
2541     for (j=0; j<bs; j++) {
2542       kmax = locrowlens[rowcount];
2543       for (k=0; k<kmax; k++) {
2544         tmp = mycols[nzcount++]/bs;
2545         if (!mask[tmp]) {
2546           mask[tmp] = 1;
2547           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2548           else masked1[dcount++] = tmp;
2549         }
2550       }
2551       rowcount++;
2552     }
2553 
2554     dlens[i]  = dcount;
2555     odlens[i] = odcount;
2556 
2557     /* zero out the mask elements we set */
2558     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2559     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2560   }
2561 
2562   /* create our matrix */
2563   ierr = MatCreateMPIBAIJ(comm,bs,m,m,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
2564   A = *newmat;
2565   MatSetOption(A,MAT_COLUMNS_SORTED);
2566 
2567   if (!rank) {
2568     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2569     /* read in my part of the matrix numerical values  */
2570     nz = procsnz[0];
2571     vals = buf;
2572     mycols = ibuf;
2573     if (size == 1)  nz -= extra_rows;
2574     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2575     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2576 
2577     /* insert into matrix */
2578     jj      = rstart*bs;
2579     for (i=0; i<m; i++) {
2580       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2581       mycols += locrowlens[i];
2582       vals   += locrowlens[i];
2583       jj++;
2584     }
2585     /* read in other processors (except the last one) and ship out */
2586     for (i=1; i<size-1; i++) {
2587       nz   = procsnz[i];
2588       vals = buf;
2589       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2590       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2591     }
2592     /* the last proc */
2593     if (size != 1){
2594       nz   = procsnz[i] - extra_rows;
2595       vals = buf;
2596       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2597       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2598       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2599     }
2600     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2601   } else {
2602     /* receive numeric values */
2603     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2604 
2605     /* receive message of values*/
2606     vals   = buf;
2607     mycols = ibuf;
2608     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2609     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2610     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2611 
2612     /* insert into matrix */
2613     jj      = rstart*bs;
2614     for (i=0; i<m; i++) {
2615       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2616       mycols += locrowlens[i];
2617       vals   += locrowlens[i];
2618       jj++;
2619     }
2620   }
2621   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2622   ierr = PetscFree(buf);CHKERRQ(ierr);
2623   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2624   ierr = PetscFree(rowners);CHKERRQ(ierr);
2625   ierr = PetscFree(dlens);CHKERRQ(ierr);
2626   ierr = PetscFree(mask);CHKERRQ(ierr);
2627   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2628   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2629   PetscFunctionReturn(0);
2630 }
2631 EXTERN_C_END
2632 
2633 #undef __FUNCT__
2634 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2635 /*@
2636    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2637 
2638    Input Parameters:
2639 .  mat  - the matrix
2640 .  fact - factor
2641 
2642    Collective on Mat
2643 
2644    Level: advanced
2645 
2646   Notes:
2647    This can also be set by the command line option: -mat_use_hash_table fact
2648 
2649 .keywords: matrix, hashtable, factor, HT
2650 
2651 .seealso: MatSetOption()
2652 @*/
2653 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2654 {
2655   Mat_MPIBAIJ *baij;
2656   int         ierr;
2657   PetscTruth  flg;
2658 
2659   PetscFunctionBegin;
2660   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2661   ierr = PetscTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg);CHKERRQ(ierr);
2662   if (!flg) {
2663     SETERRQ(PETSC_ERR_ARG_WRONG,"Incorrect matrix type. Use MPIBAIJ only.");
2664   }
2665   baij = (Mat_MPIBAIJ*)mat->data;
2666   baij->ht_fact = fact;
2667   PetscFunctionReturn(0);
2668 }
2669 
2670 #undef __FUNCT__
2671 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2672 int MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2673 {
2674   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2675   PetscFunctionBegin;
2676   *Ad     = a->A;
2677   *Ao     = a->B;
2678   *colmap = a->garray;
2679   PetscFunctionReturn(0);
2680 }
2681