xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 09f3b4e5628a00a1eaf17d80982cfbcc515cc9c1)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 
5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat);
15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
16 
17 /*  UGLY, ugly, ugly
18    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
19    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
20    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
21    converts the entries into single precision and then calls ..._MatScalar() to put them
22    into the single precision data structures.
23 */
24 #if defined(PETSC_USE_MAT_SINGLE)
25 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
26 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
27 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
28 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
29 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
30 #else
31 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
32 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
33 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
34 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
35 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
36 #endif
37 
38 #undef __FUNCT__
39 #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
40 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v)
41 {
42   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
43   PetscErrorCode ierr;
44   PetscInt       i;
45   PetscScalar    *va,*vb;
46   Vec            vtmp;
47 
48   PetscFunctionBegin;
49 
50   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
51   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
52 
53   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
54   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
55   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
56 
57   for (i=0; i<A->m; i++){
58     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
59   }
60 
61   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
62   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
63   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
64 
65   PetscFunctionReturn(0);
66 }
67 
68 EXTERN_C_BEGIN
69 #undef __FUNCT__
70 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
71 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
72 {
73   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
74   PetscErrorCode ierr;
75 
76   PetscFunctionBegin;
77   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
78   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
79   PetscFunctionReturn(0);
80 }
81 EXTERN_C_END
82 
83 EXTERN_C_BEGIN
84 #undef __FUNCT__
85 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
86 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
87 {
88   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
89   PetscErrorCode ierr;
90 
91   PetscFunctionBegin;
92   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
93   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
94   PetscFunctionReturn(0);
95 }
96 EXTERN_C_END
97 
98 /*
99      Local utility routine that creates a mapping from the global column
100    number to the local number in the off-diagonal part of the local
101    storage of the matrix.  This is done in a non scable way since the
102    length of colmap equals the global matrix length.
103 */
104 #undef __FUNCT__
105 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
106 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
107 {
108   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
109   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
110   PetscErrorCode ierr;
111   PetscInt       nbs = B->nbs,i,bs=mat->bs;
112 
113   PetscFunctionBegin;
114 #if defined (PETSC_USE_CTABLE)
115   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
116   for (i=0; i<nbs; i++){
117     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
118   }
119 #else
120   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
121   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
122   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
123   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
124 #endif
125   PetscFunctionReturn(0);
126 }
127 
128 #define CHUNKSIZE  10
129 
130 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
131 { \
132  \
133     brow = row/bs;  \
134     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135     rmax = aimax[brow]; nrow = ailen[brow]; \
136       bcol = col/bs; \
137       ridx = row % bs; cidx = col % bs; \
138       low = 0; high = nrow; \
139       while (high-low > 3) { \
140         t = (low+high)/2; \
141         if (rp[t] > bcol) high = t; \
142         else              low  = t; \
143       } \
144       for (_i=low; _i<high; _i++) { \
145         if (rp[_i] > bcol) break; \
146         if (rp[_i] == bcol) { \
147           bap  = ap +  bs2*_i + bs*cidx + ridx; \
148           if (addv == ADD_VALUES) *bap += value;  \
149           else                    *bap  = value;  \
150           goto a_noinsert; \
151         } \
152       } \
153       if (a->nonew == 1) goto a_noinsert; \
154       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
155       MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \
156       N = nrow++ - 1;  \
157       /* shift up all the later entries in this row */ \
158       for (ii=N; ii>=_i; ii--) { \
159         rp[ii+1] = rp[ii]; \
160         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
161       } \
162       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
163       rp[_i]                      = bcol;  \
164       ap[bs2*_i + bs*cidx + ridx] = value;  \
165       a_noinsert:; \
166     ailen[brow] = nrow; \
167 }
168 
169 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
170 { \
171     brow = row/bs;  \
172     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
173     rmax = bimax[brow]; nrow = bilen[brow]; \
174       bcol = col/bs; \
175       ridx = row % bs; cidx = col % bs; \
176       low = 0; high = nrow; \
177       while (high-low > 3) { \
178         t = (low+high)/2; \
179         if (rp[t] > bcol) high = t; \
180         else              low  = t; \
181       } \
182       for (_i=low; _i<high; _i++) { \
183         if (rp[_i] > bcol) break; \
184         if (rp[_i] == bcol) { \
185           bap  = ap +  bs2*_i + bs*cidx + ridx; \
186           if (addv == ADD_VALUES) *bap += value;  \
187           else                    *bap  = value;  \
188           goto b_noinsert; \
189         } \
190       } \
191       if (b->nonew == 1) goto b_noinsert; \
192       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
193       MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \
194       CHKMEMQ;\
195       N = nrow++ - 1;  \
196       /* shift up all the later entries in this row */ \
197       for (ii=N; ii>=_i; ii--) { \
198         rp[ii+1] = rp[ii]; \
199         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
200       } \
201       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
202       rp[_i]                      = bcol;  \
203       ap[bs2*_i + bs*cidx + ridx] = value;  \
204       b_noinsert:; \
205     bilen[brow] = nrow; \
206 }
207 
208 #if defined(PETSC_USE_MAT_SINGLE)
209 #undef __FUNCT__
210 #define __FUNCT__ "MatSetValues_MPIBAIJ"
211 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
212 {
213   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
214   PetscErrorCode ierr;
215   PetscInt       i,N = m*n;
216   MatScalar      *vsingle;
217 
218   PetscFunctionBegin;
219   if (N > b->setvalueslen) {
220     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
221     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
222     b->setvalueslen  = N;
223   }
224   vsingle = b->setvaluescopy;
225 
226   for (i=0; i<N; i++) {
227     vsingle[i] = v[i];
228   }
229   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
230   PetscFunctionReturn(0);
231 }
232 
233 #undef __FUNCT__
234 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
235 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
236 {
237   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
238   PetscErrorCode ierr;
239   PetscInt       i,N = m*n*b->bs2;
240   MatScalar      *vsingle;
241 
242   PetscFunctionBegin;
243   if (N > b->setvalueslen) {
244     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
245     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
246     b->setvalueslen  = N;
247   }
248   vsingle = b->setvaluescopy;
249   for (i=0; i<N; i++) {
250     vsingle[i] = v[i];
251   }
252   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
258 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
259 {
260   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
261   PetscErrorCode ierr;
262   PetscInt       i,N = m*n;
263   MatScalar      *vsingle;
264 
265   PetscFunctionBegin;
266   if (N > b->setvalueslen) {
267     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
268     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
269     b->setvalueslen  = N;
270   }
271   vsingle = b->setvaluescopy;
272   for (i=0; i<N; i++) {
273     vsingle[i] = v[i];
274   }
275   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
281 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
282 {
283   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
284   PetscErrorCode ierr;
285   PetscInt       i,N = m*n*b->bs2;
286   MatScalar      *vsingle;
287 
288   PetscFunctionBegin;
289   if (N > b->setvalueslen) {
290     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
291     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
292     b->setvalueslen  = N;
293   }
294   vsingle = b->setvaluescopy;
295   for (i=0; i<N; i++) {
296     vsingle[i] = v[i];
297   }
298   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
299   PetscFunctionReturn(0);
300 }
301 #endif
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
305 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
306 {
307   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
308   MatScalar      value;
309   PetscTruth     roworiented = baij->roworiented;
310   PetscErrorCode ierr;
311   PetscInt       i,j,row,col;
312   PetscInt       rstart_orig=baij->rstart_bs;
313   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
314   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
315 
316   /* Some Variables required in the macro */
317   Mat            A = baij->A;
318   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
319   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
320   MatScalar      *aa=a->a;
321 
322   Mat            B = baij->B;
323   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
324   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
325   MatScalar      *ba=b->a;
326 
327   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
328   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
329   MatScalar      *ap,*bap;
330 
331   PetscFunctionBegin;
332   for (i=0; i<m; i++) {
333     if (im[i] < 0) continue;
334 #if defined(PETSC_USE_DEBUG)
335     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
336 #endif
337     if (im[i] >= rstart_orig && im[i] < rend_orig) {
338       row = im[i] - rstart_orig;
339       for (j=0; j<n; j++) {
340         if (in[j] >= cstart_orig && in[j] < cend_orig){
341           col = in[j] - cstart_orig;
342           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
343           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
344           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
345         } else if (in[j] < 0) continue;
346 #if defined(PETSC_USE_DEBUG)
347         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->N-1);}
348 #endif
349         else {
350           if (mat->was_assembled) {
351             if (!baij->colmap) {
352               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
353             }
354 #if defined (PETSC_USE_CTABLE)
355             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
356             col  = col - 1;
357 #else
358             col = baij->colmap[in[j]/bs] - 1;
359 #endif
360             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
361               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
362               col =  in[j];
363               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
364               B = baij->B;
365               b = (Mat_SeqBAIJ*)(B)->data;
366               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
367               ba=b->a;
368             } else col += in[j]%bs;
369           } else col = in[j];
370           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
371           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
372           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
373         }
374       }
375     } else {
376       if (!baij->donotstash) {
377         if (roworiented) {
378           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
379         } else {
380           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
381         }
382       }
383     }
384   }
385   PetscFunctionReturn(0);
386 }
387 
388 #undef __FUNCT__
389 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
390 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
391 {
392   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
393   const MatScalar *value;
394   MatScalar       *barray=baij->barray;
395   PetscTruth      roworiented = baij->roworiented;
396   PetscErrorCode  ierr;
397   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
398   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
399   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
400 
401   PetscFunctionBegin;
402   if(!barray) {
403     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
404     baij->barray = barray;
405   }
406 
407   if (roworiented) {
408     stepval = (n-1)*bs;
409   } else {
410     stepval = (m-1)*bs;
411   }
412   for (i=0; i<m; i++) {
413     if (im[i] < 0) continue;
414 #if defined(PETSC_USE_DEBUG)
415     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
416 #endif
417     if (im[i] >= rstart && im[i] < rend) {
418       row = im[i] - rstart;
419       for (j=0; j<n; j++) {
420         /* If NumCol = 1 then a copy is not required */
421         if ((roworiented) && (n == 1)) {
422           barray = (MatScalar*)v + i*bs2;
423         } else if((!roworiented) && (m == 1)) {
424           barray = (MatScalar*)v + j*bs2;
425         } else { /* Here a copy is required */
426           if (roworiented) {
427             value = v + i*(stepval+bs)*bs + j*bs;
428           } else {
429             value = v + j*(stepval+bs)*bs + i*bs;
430           }
431           for (ii=0; ii<bs; ii++,value+=stepval) {
432             for (jj=0; jj<bs; jj++) {
433               *barray++  = *value++;
434             }
435           }
436           barray -=bs2;
437         }
438 
439         if (in[j] >= cstart && in[j] < cend){
440           col  = in[j] - cstart;
441           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
442         }
443         else if (in[j] < 0) continue;
444 #if defined(PETSC_USE_DEBUG)
445         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
446 #endif
447         else {
448           if (mat->was_assembled) {
449             if (!baij->colmap) {
450               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
451             }
452 
453 #if defined(PETSC_USE_DEBUG)
454 #if defined (PETSC_USE_CTABLE)
455             { PetscInt data;
456               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
457               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
458             }
459 #else
460             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
461 #endif
462 #endif
463 #if defined (PETSC_USE_CTABLE)
464 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
465             col  = (col - 1)/bs;
466 #else
467             col = (baij->colmap[in[j]] - 1)/bs;
468 #endif
469             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
470               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
471               col =  in[j];
472             }
473           }
474           else col = in[j];
475           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
476         }
477       }
478     } else {
479       if (!baij->donotstash) {
480         if (roworiented) {
481           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
482         } else {
483           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
484         }
485       }
486     }
487   }
488   PetscFunctionReturn(0);
489 }
490 
491 #define HASH_KEY 0.6180339887
492 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
493 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
494 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
495 #undef __FUNCT__
496 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
497 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
498 {
499   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
500   PetscTruth     roworiented = baij->roworiented;
501   PetscErrorCode ierr;
502   PetscInt       i,j,row,col;
503   PetscInt       rstart_orig=baij->rstart_bs;
504   PetscInt       rend_orig=baij->rend_bs,Nbs=baij->Nbs;
505   PetscInt       h1,key,size=baij->ht_size,bs=mat->bs,*HT=baij->ht,idx;
506   PetscReal      tmp;
507   MatScalar      **HD = baij->hd,value;
508 #if defined(PETSC_USE_DEBUG)
509   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
510 #endif
511 
512   PetscFunctionBegin;
513 
514   for (i=0; i<m; i++) {
515 #if defined(PETSC_USE_DEBUG)
516     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
517     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
518 #endif
519       row = im[i];
520     if (row >= rstart_orig && row < rend_orig) {
521       for (j=0; j<n; j++) {
522         col = in[j];
523         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
524         /* Look up PetscInto the Hash Table */
525         key = (row/bs)*Nbs+(col/bs)+1;
526         h1  = HASH(size,key,tmp);
527 
528 
529         idx = h1;
530 #if defined(PETSC_USE_DEBUG)
531         insert_ct++;
532         total_ct++;
533         if (HT[idx] != key) {
534           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
535           if (idx == size) {
536             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
537             if (idx == h1) {
538               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
539             }
540           }
541         }
542 #else
543         if (HT[idx] != key) {
544           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
545           if (idx == size) {
546             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
547             if (idx == h1) {
548               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
549             }
550           }
551         }
552 #endif
553         /* A HASH table entry is found, so insert the values at the correct address */
554         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
555         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
556       }
557     } else {
558       if (!baij->donotstash) {
559         if (roworiented) {
560           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
561         } else {
562           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
563         }
564       }
565     }
566   }
567 #if defined(PETSC_USE_DEBUG)
568   baij->ht_total_ct = total_ct;
569   baij->ht_insert_ct = insert_ct;
570 #endif
571   PetscFunctionReturn(0);
572 }
573 
574 #undef __FUNCT__
575 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
576 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
577 {
578   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
579   PetscTruth      roworiented = baij->roworiented;
580   PetscErrorCode  ierr;
581   PetscInt        i,j,ii,jj,row,col;
582   PetscInt        rstart=baij->rstart ;
583   PetscInt        rend=baij->rend,stepval,bs=mat->bs,bs2=baij->bs2;
584   PetscInt        h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
585   PetscReal       tmp;
586   MatScalar       **HD = baij->hd,*baij_a;
587   const MatScalar *v_t,*value;
588 #if defined(PETSC_USE_DEBUG)
589   PetscInt        total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
590 #endif
591 
592   PetscFunctionBegin;
593 
594   if (roworiented) {
595     stepval = (n-1)*bs;
596   } else {
597     stepval = (m-1)*bs;
598   }
599   for (i=0; i<m; i++) {
600 #if defined(PETSC_USE_DEBUG)
601     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
602     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
603 #endif
604     row   = im[i];
605     v_t   = v + i*bs2;
606     if (row >= rstart && row < rend) {
607       for (j=0; j<n; j++) {
608         col = in[j];
609 
610         /* Look up into the Hash Table */
611         key = row*Nbs+col+1;
612         h1  = HASH(size,key,tmp);
613 
614         idx = h1;
615 #if defined(PETSC_USE_DEBUG)
616         total_ct++;
617         insert_ct++;
618        if (HT[idx] != key) {
619           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
620           if (idx == size) {
621             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
622             if (idx == h1) {
623               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
624             }
625           }
626         }
627 #else
628         if (HT[idx] != key) {
629           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
630           if (idx == size) {
631             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
632             if (idx == h1) {
633               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
634             }
635           }
636         }
637 #endif
638         baij_a = HD[idx];
639         if (roworiented) {
640           /*value = v + i*(stepval+bs)*bs + j*bs;*/
641           /* value = v + (i*(stepval+bs)+j)*bs; */
642           value = v_t;
643           v_t  += bs;
644           if (addv == ADD_VALUES) {
645             for (ii=0; ii<bs; ii++,value+=stepval) {
646               for (jj=ii; jj<bs2; jj+=bs) {
647                 baij_a[jj]  += *value++;
648               }
649             }
650           } else {
651             for (ii=0; ii<bs; ii++,value+=stepval) {
652               for (jj=ii; jj<bs2; jj+=bs) {
653                 baij_a[jj]  = *value++;
654               }
655             }
656           }
657         } else {
658           value = v + j*(stepval+bs)*bs + i*bs;
659           if (addv == ADD_VALUES) {
660             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
661               for (jj=0; jj<bs; jj++) {
662                 baij_a[jj]  += *value++;
663               }
664             }
665           } else {
666             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
667               for (jj=0; jj<bs; jj++) {
668                 baij_a[jj]  = *value++;
669               }
670             }
671           }
672         }
673       }
674     } else {
675       if (!baij->donotstash) {
676         if (roworiented) {
677           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
678         } else {
679           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
680         }
681       }
682     }
683   }
684 #if defined(PETSC_USE_DEBUG)
685   baij->ht_total_ct = total_ct;
686   baij->ht_insert_ct = insert_ct;
687 #endif
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatGetValues_MPIBAIJ"
693 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
694 {
695   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
696   PetscErrorCode ierr;
697   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
698   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
699 
700   PetscFunctionBegin;
701   for (i=0; i<m; i++) {
702     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
703     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
704     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
705       row = idxm[i] - bsrstart;
706       for (j=0; j<n; j++) {
707         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
708         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
709         if (idxn[j] >= bscstart && idxn[j] < bscend){
710           col = idxn[j] - bscstart;
711           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
712         } else {
713           if (!baij->colmap) {
714             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
715           }
716 #if defined (PETSC_USE_CTABLE)
717           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
718           data --;
719 #else
720           data = baij->colmap[idxn[j]/bs]-1;
721 #endif
722           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
723           else {
724             col  = data + idxn[j]%bs;
725             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
726           }
727         }
728       }
729     } else {
730       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
731     }
732   }
733  PetscFunctionReturn(0);
734 }
735 
736 #undef __FUNCT__
737 #define __FUNCT__ "MatNorm_MPIBAIJ"
738 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
739 {
740   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
741   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
742   PetscErrorCode ierr;
743   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->bs,nz,row,col;
744   PetscReal      sum = 0.0;
745   MatScalar      *v;
746 
747   PetscFunctionBegin;
748   if (baij->size == 1) {
749     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
750   } else {
751     if (type == NORM_FROBENIUS) {
752       v = amat->a;
753       nz = amat->nz*bs2;
754       for (i=0; i<nz; i++) {
755 #if defined(PETSC_USE_COMPLEX)
756         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
757 #else
758         sum += (*v)*(*v); v++;
759 #endif
760       }
761       v = bmat->a;
762       nz = bmat->nz*bs2;
763       for (i=0; i<nz; i++) {
764 #if defined(PETSC_USE_COMPLEX)
765         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
766 #else
767         sum += (*v)*(*v); v++;
768 #endif
769       }
770       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
771       *nrm = sqrt(*nrm);
772     } else if (type == NORM_1) { /* max column sum */
773       PetscReal *tmp,*tmp2;
774       PetscInt  *jj,*garray=baij->garray,cstart=baij->cstart;
775       ierr = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
776       tmp2 = tmp + mat->N;
777       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
778       v = amat->a; jj = amat->j;
779       for (i=0; i<amat->nz; i++) {
780         for (j=0; j<bs; j++){
781           col = bs*(cstart + *jj) + j; /* column index */
782           for (row=0; row<bs; row++){
783             tmp[col] += PetscAbsScalar(*v);  v++;
784           }
785         }
786         jj++;
787       }
788       v = bmat->a; jj = bmat->j;
789       for (i=0; i<bmat->nz; i++) {
790         for (j=0; j<bs; j++){
791           col = bs*garray[*jj] + j;
792           for (row=0; row<bs; row++){
793             tmp[col] += PetscAbsScalar(*v); v++;
794           }
795         }
796         jj++;
797       }
798       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
799       *nrm = 0.0;
800       for (j=0; j<mat->N; j++) {
801         if (tmp2[j] > *nrm) *nrm = tmp2[j];
802       }
803       ierr = PetscFree(tmp);CHKERRQ(ierr);
804     } else if (type == NORM_INFINITY) { /* max row sum */
805       PetscReal *sums;
806       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
807       sum = 0.0;
808       for (j=0; j<amat->mbs; j++) {
809         for (row=0; row<bs; row++) sums[row] = 0.0;
810         v = amat->a + bs2*amat->i[j];
811         nz = amat->i[j+1]-amat->i[j];
812         for (i=0; i<nz; i++) {
813           for (col=0; col<bs; col++){
814             for (row=0; row<bs; row++){
815               sums[row] += PetscAbsScalar(*v); v++;
816             }
817           }
818         }
819         v = bmat->a + bs2*bmat->i[j];
820         nz = bmat->i[j+1]-bmat->i[j];
821         for (i=0; i<nz; i++) {
822           for (col=0; col<bs; col++){
823             for (row=0; row<bs; row++){
824               sums[row] += PetscAbsScalar(*v); v++;
825             }
826           }
827         }
828         for (row=0; row<bs; row++){
829           if (sums[row] > sum) sum = sums[row];
830         }
831       }
832       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
833       ierr = PetscFree(sums);CHKERRQ(ierr);
834     } else {
835       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
836     }
837   }
838   PetscFunctionReturn(0);
839 }
840 
841 /*
842   Creates the hash table, and sets the table
843   This table is created only once.
844   If new entried need to be added to the matrix
845   then the hash table has to be destroyed and
846   recreated.
847 */
848 #undef __FUNCT__
849 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
850 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
851 {
852   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
853   Mat            A = baij->A,B=baij->B;
854   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
855   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
856   PetscErrorCode ierr;
857   PetscInt       size,bs2=baij->bs2,rstart=baij->rstart;
858   PetscInt       cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
859   PetscInt       *HT,key;
860   MatScalar      **HD;
861   PetscReal      tmp;
862 #if defined(PETSC_USE_VERBOSE)
863   PetscInt       ct=0,max=0;
864 #endif
865 
866   PetscFunctionBegin;
867   baij->ht_size=(PetscInt)(factor*nz);
868   size = baij->ht_size;
869 
870   if (baij->ht) {
871     PetscFunctionReturn(0);
872   }
873 
874   /* Allocate Memory for Hash Table */
875   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
876   baij->ht = (PetscInt*)(baij->hd + size);
877   HD       = baij->hd;
878   HT       = baij->ht;
879 
880 
881   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
882 
883 
884   /* Loop Over A */
885   for (i=0; i<a->mbs; i++) {
886     for (j=ai[i]; j<ai[i+1]; j++) {
887       row = i+rstart;
888       col = aj[j]+cstart;
889 
890       key = row*Nbs + col + 1;
891       h1  = HASH(size,key,tmp);
892       for (k=0; k<size; k++){
893         if (!HT[(h1+k)%size]) {
894           HT[(h1+k)%size] = key;
895           HD[(h1+k)%size] = a->a + j*bs2;
896           break;
897 #if defined(PETSC_USE_VERBOSE)
898         } else {
899           ct++;
900 #endif
901         }
902       }
903 #if defined(PETSC_USE_VERBOSE)
904       if (k> max) max = k;
905 #endif
906     }
907   }
908   /* Loop Over B */
909   for (i=0; i<b->mbs; i++) {
910     for (j=bi[i]; j<bi[i+1]; j++) {
911       row = i+rstart;
912       col = garray[bj[j]];
913       key = row*Nbs + col + 1;
914       h1  = HASH(size,key,tmp);
915       for (k=0; k<size; k++){
916         if (!HT[(h1+k)%size]) {
917           HT[(h1+k)%size] = key;
918           HD[(h1+k)%size] = b->a + j*bs2;
919           break;
920 #if defined(PETSC_USE_VERBOSE)
921         } else {
922           ct++;
923 #endif
924         }
925       }
926 #if defined(PETSC_USE_VERBOSE)
927       if (k> max) max = k;
928 #endif
929     }
930   }
931 
932   /* Print Summary */
933 #if defined(PETSC_USE_VERBOSE)
934   for (i=0,j=0; i<size; i++) {
935     if (HT[i]) {j++;}
936   }
937   ierr = PetscVerboseInfo((0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max));CHKERRQ(ierr);
938 #endif
939   PetscFunctionReturn(0);
940 }
941 
942 #undef __FUNCT__
943 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
944 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
945 {
946   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
947   PetscErrorCode ierr;
948   PetscInt       nstash,reallocs;
949   InsertMode     addv;
950 
951   PetscFunctionBegin;
952   if (baij->donotstash) {
953     PetscFunctionReturn(0);
954   }
955 
956   /* make sure all processors are either in INSERTMODE or ADDMODE */
957   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
958   if (addv == (ADD_VALUES|INSERT_VALUES)) {
959     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
960   }
961   mat->insertmode = addv; /* in case this processor had no cache */
962 
963   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
964   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
965   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
966   ierr = PetscVerboseInfo((0,"MatAssemblyBegin_MPIBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
967   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
968   ierr = PetscVerboseInfo((0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 #undef __FUNCT__
973 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
974 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
975 {
976   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
977   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
978   PetscErrorCode ierr;
979   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
980   PetscInt       *row,*col,other_disassembled;
981   PetscTruth     r1,r2,r3;
982   MatScalar      *val;
983   InsertMode     addv = mat->insertmode;
984   PetscMPIInt    n;
985 
986   PetscFunctionBegin;
987   if (!baij->donotstash) {
988     while (1) {
989       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
990       if (!flg) break;
991 
992       for (i=0; i<n;) {
993         /* Now identify the consecutive vals belonging to the same row */
994         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
995         if (j < n) ncols = j-i;
996         else       ncols = n-i;
997         /* Now assemble all these values with a single function call */
998         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
999         i = j;
1000       }
1001     }
1002     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1003     /* Now process the block-stash. Since the values are stashed column-oriented,
1004        set the roworiented flag to column oriented, and after MatSetValues()
1005        restore the original flags */
1006     r1 = baij->roworiented;
1007     r2 = a->roworiented;
1008     r3 = b->roworiented;
1009     baij->roworiented = PETSC_FALSE;
1010     a->roworiented    = PETSC_FALSE;
1011     b->roworiented    = PETSC_FALSE;
1012     while (1) {
1013       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1014       if (!flg) break;
1015 
1016       for (i=0; i<n;) {
1017         /* Now identify the consecutive vals belonging to the same row */
1018         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1019         if (j < n) ncols = j-i;
1020         else       ncols = n-i;
1021         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1022         i = j;
1023       }
1024     }
1025     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1026     baij->roworiented = r1;
1027     a->roworiented    = r2;
1028     b->roworiented    = r3;
1029   }
1030 
1031   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1032   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1033 
1034   /* determine if any processor has disassembled, if so we must
1035      also disassemble ourselfs, in order that we may reassemble. */
1036   /*
1037      if nonzero structure of submatrix B cannot change then we know that
1038      no processor disassembled thus we can skip this stuff
1039   */
1040   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1041     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1042     if (mat->was_assembled && !other_disassembled) {
1043       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1044     }
1045   }
1046 
1047   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1048     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1049   }
1050   b->compressedrow.use = PETSC_TRUE;
1051   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1052   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1053 
1054 #if defined(PETSC_USE_VERBOSE)
1055   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1056     ierr = PetscVerboseInfo((0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct));CHKERRQ(ierr);
1057     baij->ht_total_ct  = 0;
1058     baij->ht_insert_ct = 0;
1059   }
1060 #endif
1061   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1062     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1063     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1064     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1065   }
1066 
1067   if (baij->rowvalues) {
1068     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1069     baij->rowvalues = 0;
1070   }
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 #undef __FUNCT__
1075 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1076 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1077 {
1078   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1079   PetscErrorCode    ierr;
1080   PetscMPIInt       size = baij->size,rank = baij->rank;
1081   PetscInt          bs = mat->bs;
1082   PetscTruth        iascii,isdraw;
1083   PetscViewer       sviewer;
1084   PetscViewerFormat format;
1085 
1086   PetscFunctionBegin;
1087   /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */
1088   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1089   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1090   if (iascii) {
1091     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1092     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1093       MatInfo info;
1094       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1095       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1096       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
1097               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
1098               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
1099       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1100       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1101       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1102       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1103       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1104       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1105       PetscFunctionReturn(0);
1106     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1107       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1108       PetscFunctionReturn(0);
1109     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1110       PetscFunctionReturn(0);
1111     }
1112   }
1113 
1114   if (isdraw) {
1115     PetscDraw       draw;
1116     PetscTruth isnull;
1117     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1118     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1119   }
1120 
1121   if (size == 1) {
1122     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1123     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1124   } else {
1125     /* assemble the entire matrix onto first processor. */
1126     Mat         A;
1127     Mat_SeqBAIJ *Aloc;
1128     PetscInt    M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1129     MatScalar   *a;
1130 
1131     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1132     /* Perhaps this should be the type of mat? */
1133     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
1134     if (!rank) {
1135       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1136     } else {
1137       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1138     }
1139     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1140     ierr = MatMPIBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1141     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1142 
1143     /* copy over the A part */
1144     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1145     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1146     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1147 
1148     for (i=0; i<mbs; i++) {
1149       rvals[0] = bs*(baij->rstart + i);
1150       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1151       for (j=ai[i]; j<ai[i+1]; j++) {
1152         col = (baij->cstart+aj[j])*bs;
1153         for (k=0; k<bs; k++) {
1154           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1155           col++; a += bs;
1156         }
1157       }
1158     }
1159     /* copy over the B part */
1160     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1161     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1162     for (i=0; i<mbs; i++) {
1163       rvals[0] = bs*(baij->rstart + i);
1164       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1165       for (j=ai[i]; j<ai[i+1]; j++) {
1166         col = baij->garray[aj[j]]*bs;
1167         for (k=0; k<bs; k++) {
1168           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1169           col++; a += bs;
1170         }
1171       }
1172     }
1173     ierr = PetscFree(rvals);CHKERRQ(ierr);
1174     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1175     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1176     /*
1177        Everyone has to call to draw the matrix since the graphics waits are
1178        synchronized across all processors that share the PetscDraw object
1179     */
1180     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1181     if (!rank) {
1182       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1183       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1184     }
1185     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1186     ierr = MatDestroy(A);CHKERRQ(ierr);
1187   }
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 #undef __FUNCT__
1192 #define __FUNCT__ "MatView_MPIBAIJ"
1193 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1194 {
1195   PetscErrorCode ierr;
1196   PetscTruth     iascii,isdraw,issocket,isbinary;
1197 
1198   PetscFunctionBegin;
1199   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1200   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1201   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1202   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1203   if (iascii || isdraw || issocket || isbinary) {
1204     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1205   } else {
1206     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1207   }
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 #undef __FUNCT__
1212 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1213 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1214 {
1215   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1216   PetscErrorCode ierr;
1217 
1218   PetscFunctionBegin;
1219 #if defined(PETSC_USE_LOG)
1220   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
1221 #endif
1222   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1223   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1224   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1225   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1226   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1227 #if defined (PETSC_USE_CTABLE)
1228   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1229 #else
1230   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1231 #endif
1232   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1233   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1234   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1235   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1236   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1237   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1238 #if defined(PETSC_USE_MAT_SINGLE)
1239   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1240 #endif
1241   ierr = PetscFree(baij);CHKERRQ(ierr);
1242 
1243   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1244   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1245   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1246   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1247   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1248   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1249   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatMult_MPIBAIJ"
1255 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1256 {
1257   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1258   PetscErrorCode ierr;
1259   PetscInt       nt;
1260 
1261   PetscFunctionBegin;
1262   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1263   if (nt != A->n) {
1264     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1265   }
1266   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1267   if (nt != A->m) {
1268     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1269   }
1270   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1271   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1272   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1273   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1274   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 #undef __FUNCT__
1279 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1280 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1281 {
1282   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1283   PetscErrorCode ierr;
1284 
1285   PetscFunctionBegin;
1286   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1287   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1288   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1289   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1290   PetscFunctionReturn(0);
1291 }
1292 
1293 #undef __FUNCT__
1294 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1295 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1296 {
1297   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1298   PetscErrorCode ierr;
1299   PetscTruth     merged;
1300 
1301   PetscFunctionBegin;
1302   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1303   /* do nondiagonal part */
1304   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1305   if (!merged) {
1306     /* send it on its way */
1307     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1308     /* do local part */
1309     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1310     /* receive remote parts: note this assumes the values are not actually */
1311     /* inserted in yy until the next line */
1312     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1313   } else {
1314     /* do local part */
1315     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1316     /* send it on its way */
1317     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1318     /* values actually were received in the Begin() but we need to call this nop */
1319     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1320   }
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 #undef __FUNCT__
1325 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1326 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1327 {
1328   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1329   PetscErrorCode ierr;
1330 
1331   PetscFunctionBegin;
1332   /* do nondiagonal part */
1333   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1334   /* send it on its way */
1335   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1336   /* do local part */
1337   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1338   /* receive remote parts: note this assumes the values are not actually */
1339   /* inserted in yy until the next line, which is true for my implementation*/
1340   /* but is not perhaps always true. */
1341   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 /*
1346   This only works correctly for square matrices where the subblock A->A is the
1347    diagonal block
1348 */
1349 #undef __FUNCT__
1350 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1351 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1352 {
1353   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1354   PetscErrorCode ierr;
1355 
1356   PetscFunctionBegin;
1357   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1358   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1359   PetscFunctionReturn(0);
1360 }
1361 
1362 #undef __FUNCT__
1363 #define __FUNCT__ "MatScale_MPIBAIJ"
1364 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1365 {
1366   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1367   PetscErrorCode ierr;
1368 
1369   PetscFunctionBegin;
1370   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1371   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 #undef __FUNCT__
1376 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1377 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1378 {
1379   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1380   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1381   PetscErrorCode ierr;
1382   PetscInt       bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1383   PetscInt       nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1384   PetscInt       *cmap,*idx_p,cstart = mat->cstart;
1385 
1386   PetscFunctionBegin;
1387   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1388   mat->getrowactive = PETSC_TRUE;
1389 
1390   if (!mat->rowvalues && (idx || v)) {
1391     /*
1392         allocate enough space to hold information from the longest row.
1393     */
1394     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1395     PetscInt     max = 1,mbs = mat->mbs,tmp;
1396     for (i=0; i<mbs; i++) {
1397       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1398       if (max < tmp) { max = tmp; }
1399     }
1400     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1401     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1402   }
1403 
1404   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1405   lrow = row - brstart;
1406 
1407   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1408   if (!v)   {pvA = 0; pvB = 0;}
1409   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1410   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1411   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1412   nztot = nzA + nzB;
1413 
1414   cmap  = mat->garray;
1415   if (v  || idx) {
1416     if (nztot) {
1417       /* Sort by increasing column numbers, assuming A and B already sorted */
1418       PetscInt imark = -1;
1419       if (v) {
1420         *v = v_p = mat->rowvalues;
1421         for (i=0; i<nzB; i++) {
1422           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1423           else break;
1424         }
1425         imark = i;
1426         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1427         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1428       }
1429       if (idx) {
1430         *idx = idx_p = mat->rowindices;
1431         if (imark > -1) {
1432           for (i=0; i<imark; i++) {
1433             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1434           }
1435         } else {
1436           for (i=0; i<nzB; i++) {
1437             if (cmap[cworkB[i]/bs] < cstart)
1438               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1439             else break;
1440           }
1441           imark = i;
1442         }
1443         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1444         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1445       }
1446     } else {
1447       if (idx) *idx = 0;
1448       if (v)   *v   = 0;
1449     }
1450   }
1451   *nz = nztot;
1452   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1453   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1454   PetscFunctionReturn(0);
1455 }
1456 
1457 #undef __FUNCT__
1458 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1459 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1460 {
1461   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1462 
1463   PetscFunctionBegin;
1464   if (!baij->getrowactive) {
1465     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1466   }
1467   baij->getrowactive = PETSC_FALSE;
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 #undef __FUNCT__
1472 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1473 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1474 {
1475   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1476   PetscErrorCode ierr;
1477 
1478   PetscFunctionBegin;
1479   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1480   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1481   PetscFunctionReturn(0);
1482 }
1483 
1484 #undef __FUNCT__
1485 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1486 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1487 {
1488   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1489   Mat            A = a->A,B = a->B;
1490   PetscErrorCode ierr;
1491   PetscReal      isend[5],irecv[5];
1492 
1493   PetscFunctionBegin;
1494   info->block_size     = (PetscReal)matin->bs;
1495   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1496   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1497   isend[3] = info->memory;  isend[4] = info->mallocs;
1498   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1499   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1500   isend[3] += info->memory;  isend[4] += info->mallocs;
1501   if (flag == MAT_LOCAL) {
1502     info->nz_used      = isend[0];
1503     info->nz_allocated = isend[1];
1504     info->nz_unneeded  = isend[2];
1505     info->memory       = isend[3];
1506     info->mallocs      = isend[4];
1507   } else if (flag == MAT_GLOBAL_MAX) {
1508     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1509     info->nz_used      = irecv[0];
1510     info->nz_allocated = irecv[1];
1511     info->nz_unneeded  = irecv[2];
1512     info->memory       = irecv[3];
1513     info->mallocs      = irecv[4];
1514   } else if (flag == MAT_GLOBAL_SUM) {
1515     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1516     info->nz_used      = irecv[0];
1517     info->nz_allocated = irecv[1];
1518     info->nz_unneeded  = irecv[2];
1519     info->memory       = irecv[3];
1520     info->mallocs      = irecv[4];
1521   } else {
1522     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1523   }
1524   info->rows_global       = (PetscReal)A->M;
1525   info->columns_global    = (PetscReal)A->N;
1526   info->rows_local        = (PetscReal)A->m;
1527   info->columns_local     = (PetscReal)A->N;
1528   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1529   info->fill_ratio_needed = 0;
1530   info->factor_mallocs    = 0;
1531   PetscFunctionReturn(0);
1532 }
1533 
1534 #undef __FUNCT__
1535 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1536 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op)
1537 {
1538   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1539   PetscErrorCode ierr;
1540 
1541   PetscFunctionBegin;
1542   switch (op) {
1543   case MAT_NO_NEW_NONZERO_LOCATIONS:
1544   case MAT_YES_NEW_NONZERO_LOCATIONS:
1545   case MAT_COLUMNS_UNSORTED:
1546   case MAT_COLUMNS_SORTED:
1547   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1548   case MAT_KEEP_ZEROED_ROWS:
1549   case MAT_NEW_NONZERO_LOCATION_ERR:
1550     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1551     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1552     break;
1553   case MAT_ROW_ORIENTED:
1554     a->roworiented = PETSC_TRUE;
1555     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1556     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1557     break;
1558   case MAT_ROWS_SORTED:
1559   case MAT_ROWS_UNSORTED:
1560   case MAT_YES_NEW_DIAGONALS:
1561     ierr = PetscVerboseInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr);
1562     break;
1563   case MAT_COLUMN_ORIENTED:
1564     a->roworiented = PETSC_FALSE;
1565     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1566     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1567     break;
1568   case MAT_IGNORE_OFF_PROC_ENTRIES:
1569     a->donotstash = PETSC_TRUE;
1570     break;
1571   case MAT_NO_NEW_DIAGONALS:
1572     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1573   case MAT_USE_HASH_TABLE:
1574     a->ht_flag = PETSC_TRUE;
1575     break;
1576   case MAT_SYMMETRIC:
1577   case MAT_STRUCTURALLY_SYMMETRIC:
1578   case MAT_HERMITIAN:
1579   case MAT_SYMMETRY_ETERNAL:
1580     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1581     break;
1582   case MAT_NOT_SYMMETRIC:
1583   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1584   case MAT_NOT_HERMITIAN:
1585   case MAT_NOT_SYMMETRY_ETERNAL:
1586     break;
1587   default:
1588     SETERRQ(PETSC_ERR_SUP,"unknown option");
1589   }
1590   PetscFunctionReturn(0);
1591 }
1592 
1593 #undef __FUNCT__
1594 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1595 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1596 {
1597   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1598   Mat_SeqBAIJ    *Aloc;
1599   Mat            B;
1600   PetscErrorCode ierr;
1601   PetscInt       M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1602   PetscInt       bs=A->bs,mbs=baij->mbs;
1603   MatScalar      *a;
1604 
1605   PetscFunctionBegin;
1606   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1607   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1608   ierr = MatSetSizes(B,A->n,A->m,N,M);CHKERRQ(ierr);
1609   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1610   ierr = MatMPIBAIJSetPreallocation(B,A->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1611 
1612   /* copy over the A part */
1613   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1614   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1615   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1616 
1617   for (i=0; i<mbs; i++) {
1618     rvals[0] = bs*(baij->rstart + i);
1619     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1620     for (j=ai[i]; j<ai[i+1]; j++) {
1621       col = (baij->cstart+aj[j])*bs;
1622       for (k=0; k<bs; k++) {
1623         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1624         col++; a += bs;
1625       }
1626     }
1627   }
1628   /* copy over the B part */
1629   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1630   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1631   for (i=0; i<mbs; i++) {
1632     rvals[0] = bs*(baij->rstart + i);
1633     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1634     for (j=ai[i]; j<ai[i+1]; j++) {
1635       col = baij->garray[aj[j]]*bs;
1636       for (k=0; k<bs; k++) {
1637         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1638         col++; a += bs;
1639       }
1640     }
1641   }
1642   ierr = PetscFree(rvals);CHKERRQ(ierr);
1643   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1644   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1645 
1646   if (matout) {
1647     *matout = B;
1648   } else {
1649     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1650   }
1651   PetscFunctionReturn(0);
1652 }
1653 
1654 #undef __FUNCT__
1655 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1656 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1657 {
1658   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1659   Mat            a = baij->A,b = baij->B;
1660   PetscErrorCode ierr;
1661   PetscInt       s1,s2,s3;
1662 
1663   PetscFunctionBegin;
1664   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1665   if (rr) {
1666     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1667     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1668     /* Overlap communication with computation. */
1669     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1670   }
1671   if (ll) {
1672     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1673     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1674     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1675   }
1676   /* scale  the diagonal block */
1677   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1678 
1679   if (rr) {
1680     /* Do a scatter end and then right scale the off-diagonal block */
1681     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1682     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1683   }
1684 
1685   PetscFunctionReturn(0);
1686 }
1687 
1688 #undef __FUNCT__
1689 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1690 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1691 {
1692   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1693   PetscErrorCode ierr;
1694   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1695   PetscInt       i,*owners = l->rowners;
1696   PetscInt       *nprocs,j,idx,nsends,row;
1697   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1698   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1;
1699   PetscInt       *lens,*lrows,*values,bs=A->bs,rstart_bs=l->rstart_bs;
1700   MPI_Comm       comm = A->comm;
1701   MPI_Request    *send_waits,*recv_waits;
1702   MPI_Status     recv_status,*send_status;
1703 #if defined(PETSC_DEBUG)
1704   PetscTruth     found = PETSC_FALSE;
1705 #endif
1706 
1707   PetscFunctionBegin;
1708   /*  first count number of contributors to each processor */
1709   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1710   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1711   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1712   j = 0;
1713   for (i=0; i<N; i++) {
1714     if (lastidx > (idx = rows[i])) j = 0;
1715     lastidx = idx;
1716     for (; j<size; j++) {
1717       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1718         nprocs[2*j]++;
1719         nprocs[2*j+1] = 1;
1720         owner[i] = j;
1721 #if defined(PETSC_DEBUG)
1722         found = PETSC_TRUE;
1723 #endif
1724         break;
1725       }
1726     }
1727 #if defined(PETSC_DEBUG)
1728     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1729     found = PETSC_FALSE;
1730 #endif
1731   }
1732   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1733 
1734   /* inform other processors of number of messages and max length*/
1735   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1736 
1737   /* post receives:   */
1738   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1739   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1740   for (i=0; i<nrecvs; i++) {
1741     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1742   }
1743 
1744   /* do sends:
1745      1) starts[i] gives the starting index in svalues for stuff going to
1746      the ith processor
1747   */
1748   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1749   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1750   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1751   starts[0]  = 0;
1752   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1753   for (i=0; i<N; i++) {
1754     svalues[starts[owner[i]]++] = rows[i];
1755   }
1756 
1757   starts[0] = 0;
1758   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1759   count = 0;
1760   for (i=0; i<size; i++) {
1761     if (nprocs[2*i+1]) {
1762       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1763     }
1764   }
1765   ierr = PetscFree(starts);CHKERRQ(ierr);
1766 
1767   base = owners[rank]*bs;
1768 
1769   /*  wait on receives */
1770   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
1771   source = lens + nrecvs;
1772   count  = nrecvs; slen = 0;
1773   while (count) {
1774     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1775     /* unpack receives into our local space */
1776     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1777     source[imdex]  = recv_status.MPI_SOURCE;
1778     lens[imdex]    = n;
1779     slen          += n;
1780     count--;
1781   }
1782   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1783 
1784   /* move the data into the send scatter */
1785   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1786   count = 0;
1787   for (i=0; i<nrecvs; i++) {
1788     values = rvalues + i*nmax;
1789     for (j=0; j<lens[i]; j++) {
1790       lrows[count++] = values[j] - base;
1791     }
1792   }
1793   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1794   ierr = PetscFree(lens);CHKERRQ(ierr);
1795   ierr = PetscFree(owner);CHKERRQ(ierr);
1796   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1797 
1798   /* actually zap the local rows */
1799   /*
1800         Zero the required rows. If the "diagonal block" of the matrix
1801      is square and the user wishes to set the diagonal we use separate
1802      code so that MatSetValues() is not called for each diagonal allocating
1803      new memory, thus calling lots of mallocs and slowing things down.
1804 
1805        Contributed by: Matthew Knepley
1806   */
1807   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1808   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1809   if ((diag != 0.0) && (l->A->M == l->A->N)) {
1810     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1811   } else if (diag != 0.0) {
1812     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1813     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1814       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1815 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1816     }
1817     for (i=0; i<slen; i++) {
1818       row  = lrows[i] + rstart_bs;
1819       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1820     }
1821     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1822     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1823   } else {
1824     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1825   }
1826 
1827   ierr = PetscFree(lrows);CHKERRQ(ierr);
1828 
1829   /* wait on sends */
1830   if (nsends) {
1831     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1832     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1833     ierr = PetscFree(send_status);CHKERRQ(ierr);
1834   }
1835   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1836   ierr = PetscFree(svalues);CHKERRQ(ierr);
1837 
1838   PetscFunctionReturn(0);
1839 }
1840 
1841 #undef __FUNCT__
1842 #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1843 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A)
1844 {
1845   Mat_MPIBAIJ       *a   = (Mat_MPIBAIJ*)A->data;
1846   MPI_Comm          comm = A->comm;
1847   static PetscTruth called = PETSC_FALSE;
1848   PetscErrorCode    ierr;
1849 
1850   PetscFunctionBegin;
1851   if (!a->rank) {
1852     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1853   }
1854   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1855   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1856   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1857   PetscFunctionReturn(0);
1858 }
1859 
1860 #undef __FUNCT__
1861 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1862 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1863 {
1864   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1865   PetscErrorCode ierr;
1866 
1867   PetscFunctionBegin;
1868   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1869   PetscFunctionReturn(0);
1870 }
1871 
1872 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1873 
1874 #undef __FUNCT__
1875 #define __FUNCT__ "MatEqual_MPIBAIJ"
1876 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1877 {
1878   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1879   Mat            a,b,c,d;
1880   PetscTruth     flg;
1881   PetscErrorCode ierr;
1882 
1883   PetscFunctionBegin;
1884   a = matA->A; b = matA->B;
1885   c = matB->A; d = matB->B;
1886 
1887   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1888   if (flg) {
1889     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1890   }
1891   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1892   PetscFunctionReturn(0);
1893 }
1894 
1895 #undef __FUNCT__
1896 #define __FUNCT__ "MatCopy_MPIBAIJ"
1897 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1898 {
1899   PetscErrorCode ierr;
1900   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1901   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1902 
1903   PetscFunctionBegin;
1904   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1905   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1906     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1907   } else {
1908     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1909     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1910   }
1911   PetscFunctionReturn(0);
1912 }
1913 
1914 #undef __FUNCT__
1915 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1916 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1917 {
1918   PetscErrorCode ierr;
1919 
1920   PetscFunctionBegin;
1921   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1922   PetscFunctionReturn(0);
1923 }
1924 
1925 #include "petscblaslapack.h"
1926 #undef __FUNCT__
1927 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1928 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1929 {
1930   PetscErrorCode ierr;
1931   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1932   PetscBLASInt   bnz,one=1;
1933   Mat_SeqBAIJ    *x,*y;
1934 
1935   PetscFunctionBegin;
1936   if (str == SAME_NONZERO_PATTERN) {
1937     PetscScalar alpha = a;
1938     x = (Mat_SeqBAIJ *)xx->A->data;
1939     y = (Mat_SeqBAIJ *)yy->A->data;
1940     bnz = (PetscBLASInt)x->nz;
1941     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1942     x = (Mat_SeqBAIJ *)xx->B->data;
1943     y = (Mat_SeqBAIJ *)yy->B->data;
1944     bnz = (PetscBLASInt)x->nz;
1945     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1946   } else {
1947     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1948   }
1949   PetscFunctionReturn(0);
1950 }
1951 
1952 #undef __FUNCT__
1953 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1954 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1955 {
1956   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1957   PetscErrorCode ierr;
1958 
1959   PetscFunctionBegin;
1960   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1961   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1962   PetscFunctionReturn(0);
1963 }
1964 
1965 #undef __FUNCT__
1966 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1967 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1968 {
1969   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1970   PetscErrorCode ierr;
1971 
1972   PetscFunctionBegin;
1973   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1974   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1975   PetscFunctionReturn(0);
1976 }
1977 
1978 /* -------------------------------------------------------------------*/
1979 static struct _MatOps MatOps_Values = {
1980        MatSetValues_MPIBAIJ,
1981        MatGetRow_MPIBAIJ,
1982        MatRestoreRow_MPIBAIJ,
1983        MatMult_MPIBAIJ,
1984 /* 4*/ MatMultAdd_MPIBAIJ,
1985        MatMultTranspose_MPIBAIJ,
1986        MatMultTransposeAdd_MPIBAIJ,
1987        0,
1988        0,
1989        0,
1990 /*10*/ 0,
1991        0,
1992        0,
1993        0,
1994        MatTranspose_MPIBAIJ,
1995 /*15*/ MatGetInfo_MPIBAIJ,
1996        MatEqual_MPIBAIJ,
1997        MatGetDiagonal_MPIBAIJ,
1998        MatDiagonalScale_MPIBAIJ,
1999        MatNorm_MPIBAIJ,
2000 /*20*/ MatAssemblyBegin_MPIBAIJ,
2001        MatAssemblyEnd_MPIBAIJ,
2002        0,
2003        MatSetOption_MPIBAIJ,
2004        MatZeroEntries_MPIBAIJ,
2005 /*25*/ MatZeroRows_MPIBAIJ,
2006        0,
2007        0,
2008        0,
2009        0,
2010 /*30*/ MatSetUpPreallocation_MPIBAIJ,
2011        0,
2012        0,
2013        0,
2014        0,
2015 /*35*/ MatDuplicate_MPIBAIJ,
2016        0,
2017        0,
2018        0,
2019        0,
2020 /*40*/ MatAXPY_MPIBAIJ,
2021        MatGetSubMatrices_MPIBAIJ,
2022        MatIncreaseOverlap_MPIBAIJ,
2023        MatGetValues_MPIBAIJ,
2024        MatCopy_MPIBAIJ,
2025 /*45*/ MatPrintHelp_MPIBAIJ,
2026        MatScale_MPIBAIJ,
2027        0,
2028        0,
2029        0,
2030 /*50*/ 0,
2031        0,
2032        0,
2033        0,
2034        0,
2035 /*55*/ 0,
2036        0,
2037        MatSetUnfactored_MPIBAIJ,
2038        0,
2039        MatSetValuesBlocked_MPIBAIJ,
2040 /*60*/ 0,
2041        MatDestroy_MPIBAIJ,
2042        MatView_MPIBAIJ,
2043        MatGetPetscMaps_Petsc,
2044        0,
2045 /*65*/ 0,
2046        0,
2047        0,
2048        0,
2049        0,
2050 /*70*/ MatGetRowMax_MPIBAIJ,
2051        0,
2052        0,
2053        0,
2054        0,
2055 /*75*/ 0,
2056        0,
2057        0,
2058        0,
2059        0,
2060 /*80*/ 0,
2061        0,
2062        0,
2063        0,
2064        MatLoad_MPIBAIJ,
2065 /*85*/ 0,
2066        0,
2067        0,
2068        0,
2069        0,
2070 /*90*/ 0,
2071        0,
2072        0,
2073        0,
2074        0,
2075 /*95*/ 0,
2076        0,
2077        0,
2078        0,
2079        0,
2080 /*100*/0,
2081        0,
2082        0,
2083        0,
2084        0,
2085 /*105*/0,
2086        MatRealPart_MPIBAIJ,
2087        MatImaginaryPart_MPIBAIJ};
2088 
2089 
2090 EXTERN_C_BEGIN
2091 #undef __FUNCT__
2092 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2093 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2094 {
2095   PetscFunctionBegin;
2096   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2097   *iscopy = PETSC_FALSE;
2098   PetscFunctionReturn(0);
2099 }
2100 EXTERN_C_END
2101 
2102 EXTERN_C_BEGIN
2103 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2104 EXTERN_C_END
2105 
2106 #undef __FUNCT__
2107 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2108 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2109 {
2110   Mat_MPIBAIJ    *b  = (Mat_MPIBAIJ *)B->data;
2111   PetscInt       m = B->m/bs,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2112   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2113   const PetscInt *JJ;
2114   PetscScalar    *values;
2115   PetscErrorCode ierr;
2116 
2117   PetscFunctionBegin;
2118 #if defined(PETSC_OPT_g)
2119   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2120 #endif
2121   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2122   o_nnz = d_nnz + m;
2123 
2124   for (i=0; i<m; i++) {
2125     nnz     = I[i+1]- I[i];
2126     JJ      = J + I[i];
2127     nnz_max = PetscMax(nnz_max,nnz);
2128 #if defined(PETSC_OPT_g)
2129     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2130 #endif
2131     for (j=0; j<nnz; j++) {
2132       if (*JJ >= cstart) break;
2133       JJ++;
2134     }
2135     d = 0;
2136     for (; j<nnz; j++) {
2137       if (*JJ++ >= cend) break;
2138       d++;
2139     }
2140     d_nnz[i] = d;
2141     o_nnz[i] = nnz - d;
2142   }
2143   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2144   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2145 
2146   if (v) values = (PetscScalar*)v;
2147   else {
2148     ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2149     ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2150   }
2151 
2152   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2153   for (i=0; i<m; i++) {
2154     ii   = i + rstart;
2155     nnz  = I[i+1]- I[i];
2156     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2157   }
2158   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2159   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2160   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2161 
2162   if (!v) {
2163     ierr = PetscFree(values);CHKERRQ(ierr);
2164   }
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 #undef __FUNCT__
2169 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2170 /*@C
2171    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2172    (the default parallel PETSc format).
2173 
2174    Collective on MPI_Comm
2175 
2176    Input Parameters:
2177 +  A - the matrix
2178 .  i - the indices into j for the start of each local row (starts with zero)
2179 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2180 -  v - optional values in the matrix
2181 
2182    Level: developer
2183 
2184 .keywords: matrix, aij, compressed row, sparse, parallel
2185 
2186 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2187 @*/
2188 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2189 {
2190   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2191 
2192   PetscFunctionBegin;
2193   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2194   if (f) {
2195     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2196   }
2197   PetscFunctionReturn(0);
2198 }
2199 
2200 EXTERN_C_BEGIN
2201 #undef __FUNCT__
2202 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2203 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2204 {
2205   Mat_MPIBAIJ    *b;
2206   PetscErrorCode ierr;
2207   PetscInt       i;
2208 
2209   PetscFunctionBegin;
2210   B->preallocated = PETSC_TRUE;
2211   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2212 
2213   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2214   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2215   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2216   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2217   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2218   if (d_nnz) {
2219   for (i=0; i<B->m/bs; i++) {
2220       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2221     }
2222   }
2223   if (o_nnz) {
2224     for (i=0; i<B->m/bs; i++) {
2225       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2226     }
2227   }
2228 
2229   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2230   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2231   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2232   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2233 
2234   b = (Mat_MPIBAIJ*)B->data;
2235   B->bs  = bs;
2236   b->bs2 = bs*bs;
2237   b->mbs = B->m/bs;
2238   b->nbs = B->n/bs;
2239   b->Mbs = B->M/bs;
2240   b->Nbs = B->N/bs;
2241 
2242   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2243   b->rowners[0]    = 0;
2244   for (i=2; i<=b->size; i++) {
2245     b->rowners[i] += b->rowners[i-1];
2246   }
2247   b->rstart    = b->rowners[b->rank];
2248   b->rend      = b->rowners[b->rank+1];
2249 
2250   ierr = MPI_Allgather(&b->nbs,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2251   b->cowners[0] = 0;
2252   for (i=2; i<=b->size; i++) {
2253     b->cowners[i] += b->cowners[i-1];
2254   }
2255   b->cstart    = b->cowners[b->rank];
2256   b->cend      = b->cowners[b->rank+1];
2257 
2258   for (i=0; i<=b->size; i++) {
2259     b->rowners_bs[i] = b->rowners[i]*bs;
2260   }
2261   b->rstart_bs = b->rstart*bs;
2262   b->rend_bs   = b->rend*bs;
2263   b->cstart_bs = b->cstart*bs;
2264   b->cend_bs   = b->cend*bs;
2265 
2266   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2267   ierr = MatSetSizes(b->A,B->m,B->n,B->m,B->n);CHKERRQ(ierr);
2268   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2269   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2270   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2271   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2272   ierr = MatSetSizes(b->B,B->m,B->N,B->m,B->N);CHKERRQ(ierr);
2273   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2274   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2275   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2276 
2277   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2278 
2279   PetscFunctionReturn(0);
2280 }
2281 EXTERN_C_END
2282 
2283 EXTERN_C_BEGIN
2284 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2285 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2286 EXTERN_C_END
2287 
2288 /*MC
2289    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2290 
2291    Options Database Keys:
2292 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2293 
2294   Level: beginner
2295 
2296 .seealso: MatCreateMPIBAIJ
2297 M*/
2298 
2299 EXTERN_C_BEGIN
2300 #undef __FUNCT__
2301 #define __FUNCT__ "MatCreate_MPIBAIJ"
2302 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2303 {
2304   Mat_MPIBAIJ    *b;
2305   PetscErrorCode ierr;
2306   PetscTruth     flg;
2307 
2308   PetscFunctionBegin;
2309   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2310   B->data = (void*)b;
2311 
2312 
2313   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2314   B->mapping    = 0;
2315   B->factor     = 0;
2316   B->assembled  = PETSC_FALSE;
2317 
2318   B->insertmode = NOT_SET_VALUES;
2319   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2320   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2321 
2322   /* build local table of row and column ownerships */
2323   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
2324   ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2325   b->cowners    = b->rowners + b->size + 2;
2326   b->rowners_bs = b->cowners + b->size + 2;
2327 
2328   /* build cache for off array entries formed */
2329   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2330   b->donotstash  = PETSC_FALSE;
2331   b->colmap      = PETSC_NULL;
2332   b->garray      = PETSC_NULL;
2333   b->roworiented = PETSC_TRUE;
2334 
2335 #if defined(PETSC_USE_MAT_SINGLE)
2336   /* stuff for MatSetValues_XXX in single precision */
2337   b->setvalueslen     = 0;
2338   b->setvaluescopy    = PETSC_NULL;
2339 #endif
2340 
2341   /* stuff used in block assembly */
2342   b->barray       = 0;
2343 
2344   /* stuff used for matrix vector multiply */
2345   b->lvec         = 0;
2346   b->Mvctx        = 0;
2347 
2348   /* stuff for MatGetRow() */
2349   b->rowindices   = 0;
2350   b->rowvalues    = 0;
2351   b->getrowactive = PETSC_FALSE;
2352 
2353   /* hash table stuff */
2354   b->ht           = 0;
2355   b->hd           = 0;
2356   b->ht_size      = 0;
2357   b->ht_flag      = PETSC_FALSE;
2358   b->ht_fact      = 0;
2359   b->ht_total_ct  = 0;
2360   b->ht_insert_ct = 0;
2361 
2362   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2363   if (flg) {
2364     PetscReal fact = 1.39;
2365     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2366     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2367     if (fact <= 1.0) fact = 1.39;
2368     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2369     ierr = PetscVerboseInfo((0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr);
2370   }
2371   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2372                                      "MatStoreValues_MPIBAIJ",
2373                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2374   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2375                                      "MatRetrieveValues_MPIBAIJ",
2376                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2377   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2378                                      "MatGetDiagonalBlock_MPIBAIJ",
2379                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2380   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2381                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2382                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2383   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2384 				     "MatMPIBAIJSetPreallocationCSR_MPIAIJ",
2385 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2386   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2387                                      "MatDiagonalScaleLocal_MPIBAIJ",
2388                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2389   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2390                                      "MatSetHashTableFactor_MPIBAIJ",
2391                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2392   PetscFunctionReturn(0);
2393 }
2394 EXTERN_C_END
2395 
2396 /*MC
2397    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2398 
2399    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2400    and MATMPIBAIJ otherwise.
2401 
2402    Options Database Keys:
2403 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2404 
2405   Level: beginner
2406 
2407 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2408 M*/
2409 
2410 EXTERN_C_BEGIN
2411 #undef __FUNCT__
2412 #define __FUNCT__ "MatCreate_BAIJ"
2413 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
2414 {
2415   PetscErrorCode ierr;
2416   PetscMPIInt    size;
2417 
2418   PetscFunctionBegin;
2419   ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr);
2420   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2421   if (size == 1) {
2422     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2423   } else {
2424     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2425   }
2426   PetscFunctionReturn(0);
2427 }
2428 EXTERN_C_END
2429 
2430 #undef __FUNCT__
2431 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2432 /*@C
2433    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2434    (block compressed row).  For good matrix assembly performance
2435    the user should preallocate the matrix storage by setting the parameters
2436    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2437    performance can be increased by more than a factor of 50.
2438 
2439    Collective on Mat
2440 
2441    Input Parameters:
2442 +  A - the matrix
2443 .  bs   - size of blockk
2444 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2445            submatrix  (same for all local rows)
2446 .  d_nnz - array containing the number of block nonzeros in the various block rows
2447            of the in diagonal portion of the local (possibly different for each block
2448            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2449 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2450            submatrix (same for all local rows).
2451 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2452            off-diagonal portion of the local submatrix (possibly different for
2453            each block row) or PETSC_NULL.
2454 
2455    If the *_nnz parameter is given then the *_nz parameter is ignored
2456 
2457    Options Database Keys:
2458 .   -mat_no_unroll - uses code that does not unroll the loops in the
2459                      block calculations (much slower)
2460 .   -mat_block_size - size of the blocks to use
2461 
2462    Notes:
2463    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2464    than it must be used on all processors that share the object for that argument.
2465 
2466    Storage Information:
2467    For a square global matrix we define each processor's diagonal portion
2468    to be its local rows and the corresponding columns (a square submatrix);
2469    each processor's off-diagonal portion encompasses the remainder of the
2470    local matrix (a rectangular submatrix).
2471 
2472    The user can specify preallocated storage for the diagonal part of
2473    the local submatrix with either d_nz or d_nnz (not both).  Set
2474    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2475    memory allocation.  Likewise, specify preallocated storage for the
2476    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2477 
2478    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2479    the figure below we depict these three local rows and all columns (0-11).
2480 
2481 .vb
2482            0 1 2 3 4 5 6 7 8 9 10 11
2483           -------------------
2484    row 3  |  o o o d d d o o o o o o
2485    row 4  |  o o o d d d o o o o o o
2486    row 5  |  o o o d d d o o o o o o
2487           -------------------
2488 .ve
2489 
2490    Thus, any entries in the d locations are stored in the d (diagonal)
2491    submatrix, and any entries in the o locations are stored in the
2492    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2493    stored simply in the MATSEQBAIJ format for compressed row storage.
2494 
2495    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2496    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2497    In general, for PDE problems in which most nonzeros are near the diagonal,
2498    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2499    or you will get TERRIBLE performance; see the users' manual chapter on
2500    matrices.
2501 
2502    Level: intermediate
2503 
2504 .keywords: matrix, block, aij, compressed row, sparse, parallel
2505 
2506 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2507 @*/
2508 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2509 {
2510   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2511 
2512   PetscFunctionBegin;
2513   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2514   if (f) {
2515     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2516   }
2517   PetscFunctionReturn(0);
2518 }
2519 
2520 #undef __FUNCT__
2521 #define __FUNCT__ "MatCreateMPIBAIJ"
2522 /*@C
2523    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2524    (block compressed row).  For good matrix assembly performance
2525    the user should preallocate the matrix storage by setting the parameters
2526    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2527    performance can be increased by more than a factor of 50.
2528 
2529    Collective on MPI_Comm
2530 
2531    Input Parameters:
2532 +  comm - MPI communicator
2533 .  bs   - size of blockk
2534 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2535            This value should be the same as the local size used in creating the
2536            y vector for the matrix-vector product y = Ax.
2537 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2538            This value should be the same as the local size used in creating the
2539            x vector for the matrix-vector product y = Ax.
2540 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2541 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2542 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2543            submatrix  (same for all local rows)
2544 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2545            of the in diagonal portion of the local (possibly different for each block
2546            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2547 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2548            submatrix (same for all local rows).
2549 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2550            off-diagonal portion of the local submatrix (possibly different for
2551            each block row) or PETSC_NULL.
2552 
2553    Output Parameter:
2554 .  A - the matrix
2555 
2556    Options Database Keys:
2557 .   -mat_no_unroll - uses code that does not unroll the loops in the
2558                      block calculations (much slower)
2559 .   -mat_block_size - size of the blocks to use
2560 
2561    Notes:
2562    If the *_nnz parameter is given then the *_nz parameter is ignored
2563 
2564    A nonzero block is any block that as 1 or more nonzeros in it
2565 
2566    The user MUST specify either the local or global matrix dimensions
2567    (possibly both).
2568 
2569    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2570    than it must be used on all processors that share the object for that argument.
2571 
2572    Storage Information:
2573    For a square global matrix we define each processor's diagonal portion
2574    to be its local rows and the corresponding columns (a square submatrix);
2575    each processor's off-diagonal portion encompasses the remainder of the
2576    local matrix (a rectangular submatrix).
2577 
2578    The user can specify preallocated storage for the diagonal part of
2579    the local submatrix with either d_nz or d_nnz (not both).  Set
2580    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2581    memory allocation.  Likewise, specify preallocated storage for the
2582    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2583 
2584    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2585    the figure below we depict these three local rows and all columns (0-11).
2586 
2587 .vb
2588            0 1 2 3 4 5 6 7 8 9 10 11
2589           -------------------
2590    row 3  |  o o o d d d o o o o o o
2591    row 4  |  o o o d d d o o o o o o
2592    row 5  |  o o o d d d o o o o o o
2593           -------------------
2594 .ve
2595 
2596    Thus, any entries in the d locations are stored in the d (diagonal)
2597    submatrix, and any entries in the o locations are stored in the
2598    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2599    stored simply in the MATSEQBAIJ format for compressed row storage.
2600 
2601    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2602    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2603    In general, for PDE problems in which most nonzeros are near the diagonal,
2604    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2605    or you will get TERRIBLE performance; see the users' manual chapter on
2606    matrices.
2607 
2608    Level: intermediate
2609 
2610 .keywords: matrix, block, aij, compressed row, sparse, parallel
2611 
2612 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2613 @*/
2614 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2615 {
2616   PetscErrorCode ierr;
2617   PetscMPIInt    size;
2618 
2619   PetscFunctionBegin;
2620   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2621   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2622   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2623   if (size > 1) {
2624     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2625     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2626   } else {
2627     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2628     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2629   }
2630   PetscFunctionReturn(0);
2631 }
2632 
2633 #undef __FUNCT__
2634 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2635 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2636 {
2637   Mat            mat;
2638   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2639   PetscErrorCode ierr;
2640   PetscInt       len=0;
2641 
2642   PetscFunctionBegin;
2643   *newmat       = 0;
2644   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2645   ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr);
2646   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2647   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2648 
2649   mat->factor       = matin->factor;
2650   mat->preallocated = PETSC_TRUE;
2651   mat->assembled    = PETSC_TRUE;
2652   mat->insertmode   = NOT_SET_VALUES;
2653 
2654   a      = (Mat_MPIBAIJ*)mat->data;
2655   mat->bs  = matin->bs;
2656   a->bs2   = oldmat->bs2;
2657   a->mbs   = oldmat->mbs;
2658   a->nbs   = oldmat->nbs;
2659   a->Mbs   = oldmat->Mbs;
2660   a->Nbs   = oldmat->Nbs;
2661 
2662   a->rstart       = oldmat->rstart;
2663   a->rend         = oldmat->rend;
2664   a->cstart       = oldmat->cstart;
2665   a->cend         = oldmat->cend;
2666   a->size         = oldmat->size;
2667   a->rank         = oldmat->rank;
2668   a->donotstash   = oldmat->donotstash;
2669   a->roworiented  = oldmat->roworiented;
2670   a->rowindices   = 0;
2671   a->rowvalues    = 0;
2672   a->getrowactive = PETSC_FALSE;
2673   a->barray       = 0;
2674   a->rstart_bs    = oldmat->rstart_bs;
2675   a->rend_bs      = oldmat->rend_bs;
2676   a->cstart_bs    = oldmat->cstart_bs;
2677   a->cend_bs      = oldmat->cend_bs;
2678 
2679   /* hash table stuff */
2680   a->ht           = 0;
2681   a->hd           = 0;
2682   a->ht_size      = 0;
2683   a->ht_flag      = oldmat->ht_flag;
2684   a->ht_fact      = oldmat->ht_fact;
2685   a->ht_total_ct  = 0;
2686   a->ht_insert_ct = 0;
2687 
2688   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2689   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2690   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
2691   if (oldmat->colmap) {
2692 #if defined (PETSC_USE_CTABLE)
2693   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2694 #else
2695   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2696   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2697   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2698 #endif
2699   } else a->colmap = 0;
2700 
2701   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2702     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2703     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2704     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2705   } else a->garray = 0;
2706 
2707   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2708   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2709   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2710   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2711 
2712   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2713   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2714   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2715   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2716   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2717   *newmat = mat;
2718 
2719   PetscFunctionReturn(0);
2720 }
2721 
2722 #include "petscsys.h"
2723 
2724 #undef __FUNCT__
2725 #define __FUNCT__ "MatLoad_MPIBAIJ"
2726 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2727 {
2728   Mat            A;
2729   PetscErrorCode ierr;
2730   int            fd;
2731   PetscInt       i,nz,j,rstart,rend;
2732   PetscScalar    *vals,*buf;
2733   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2734   MPI_Status     status;
2735   PetscMPIInt    rank,size,maxnz;
2736   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2737   PetscInt       *locrowlens,*procsnz = 0,*browners;
2738   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
2739   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
2740   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2741   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
2742 
2743   PetscFunctionBegin;
2744   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2745 
2746   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2747   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2748   if (!rank) {
2749     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2750     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2751     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2752   }
2753 
2754   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2755   M = header[1]; N = header[2];
2756 
2757   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2758 
2759   /*
2760      This code adds extra rows to make sure the number of rows is
2761      divisible by the blocksize
2762   */
2763   Mbs        = M/bs;
2764   extra_rows = bs - M + bs*Mbs;
2765   if (extra_rows == bs) extra_rows = 0;
2766   else                  Mbs++;
2767   if (extra_rows && !rank) {
2768     ierr = PetscVerboseInfo((0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2769   }
2770 
2771   /* determine ownership of all rows */
2772   mbs        = Mbs/size + ((Mbs % size) > rank);
2773   m          = mbs*bs;
2774   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
2775   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2776 
2777   /* process 0 needs enough room for process with most rows */
2778   if (!rank) {
2779     mmax = rowners[1];
2780     for (i=2; i<size; i++) {
2781       mmax = PetscMax(mmax,rowners[i]);
2782     }
2783     mmax*=bs;
2784   } else mmax = m;
2785 
2786   rowners[0] = 0;
2787   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
2788   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2789   rstart = rowners[rank];
2790   rend   = rowners[rank+1];
2791 
2792   /* distribute row lengths to all processors */
2793   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2794   if (!rank) {
2795     mend = m;
2796     if (size == 1) mend = mend - extra_rows;
2797     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
2798     for (j=mend; j<m; j++) locrowlens[j] = 1;
2799     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2800     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2801     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2802     for (j=0; j<m; j++) {
2803       procsnz[0] += locrowlens[j];
2804     }
2805     for (i=1; i<size; i++) {
2806       mend = browners[i+1] - browners[i];
2807       if (i == size-1) mend = mend - extra_rows;
2808       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
2809       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2810       /* calculate the number of nonzeros on each processor */
2811       for (j=0; j<browners[i+1]-browners[i]; j++) {
2812         procsnz[i] += rowlengths[j];
2813       }
2814       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2815     }
2816     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2817   } else {
2818     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2819   }
2820 
2821   if (!rank) {
2822     /* determine max buffer needed and allocate it */
2823     maxnz = procsnz[0];
2824     for (i=1; i<size; i++) {
2825       maxnz = PetscMax(maxnz,procsnz[i]);
2826     }
2827     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2828 
2829     /* read in my part of the matrix column indices  */
2830     nz     = procsnz[0];
2831     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2832     mycols = ibuf;
2833     if (size == 1)  nz -= extra_rows;
2834     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2835     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2836 
2837     /* read in every ones (except the last) and ship off */
2838     for (i=1; i<size-1; i++) {
2839       nz   = procsnz[i];
2840       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2841       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2842     }
2843     /* read in the stuff for the last proc */
2844     if (size != 1) {
2845       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2846       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2847       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2848       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2849     }
2850     ierr = PetscFree(cols);CHKERRQ(ierr);
2851   } else {
2852     /* determine buffer space needed for message */
2853     nz = 0;
2854     for (i=0; i<m; i++) {
2855       nz += locrowlens[i];
2856     }
2857     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2858     mycols = ibuf;
2859     /* receive message of column indices*/
2860     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2861     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2862     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2863   }
2864 
2865   /* loop over local rows, determining number of off diagonal entries */
2866   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2867   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2868   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2869   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2870   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2871   rowcount = 0; nzcount = 0;
2872   for (i=0; i<mbs; i++) {
2873     dcount  = 0;
2874     odcount = 0;
2875     for (j=0; j<bs; j++) {
2876       kmax = locrowlens[rowcount];
2877       for (k=0; k<kmax; k++) {
2878         tmp = mycols[nzcount++]/bs;
2879         if (!mask[tmp]) {
2880           mask[tmp] = 1;
2881           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2882           else masked1[dcount++] = tmp;
2883         }
2884       }
2885       rowcount++;
2886     }
2887 
2888     dlens[i]  = dcount;
2889     odlens[i] = odcount;
2890 
2891     /* zero out the mask elements we set */
2892     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2893     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2894   }
2895 
2896   /* create our matrix */
2897   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2898   ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2899   ierr = MatSetType(A,type);CHKERRQ(ierr)
2900   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2901 
2902   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
2903   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2904 
2905   if (!rank) {
2906     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2907     /* read in my part of the matrix numerical values  */
2908     nz = procsnz[0];
2909     vals = buf;
2910     mycols = ibuf;
2911     if (size == 1)  nz -= extra_rows;
2912     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2913     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2914 
2915     /* insert into matrix */
2916     jj      = rstart*bs;
2917     for (i=0; i<m; i++) {
2918       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2919       mycols += locrowlens[i];
2920       vals   += locrowlens[i];
2921       jj++;
2922     }
2923     /* read in other processors (except the last one) and ship out */
2924     for (i=1; i<size-1; i++) {
2925       nz   = procsnz[i];
2926       vals = buf;
2927       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2928       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2929     }
2930     /* the last proc */
2931     if (size != 1){
2932       nz   = procsnz[i] - extra_rows;
2933       vals = buf;
2934       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2935       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2936       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2937     }
2938     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2939   } else {
2940     /* receive numeric values */
2941     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2942 
2943     /* receive message of values*/
2944     vals   = buf;
2945     mycols = ibuf;
2946     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2947     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2948     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2949 
2950     /* insert into matrix */
2951     jj      = rstart*bs;
2952     for (i=0; i<m; i++) {
2953       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2954       mycols += locrowlens[i];
2955       vals   += locrowlens[i];
2956       jj++;
2957     }
2958   }
2959   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2960   ierr = PetscFree(buf);CHKERRQ(ierr);
2961   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2962   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2963   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2964   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2965   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2966   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2967 
2968   *newmat = A;
2969   PetscFunctionReturn(0);
2970 }
2971 
2972 #undef __FUNCT__
2973 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2974 /*@
2975    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2976 
2977    Input Parameters:
2978 .  mat  - the matrix
2979 .  fact - factor
2980 
2981    Collective on Mat
2982 
2983    Level: advanced
2984 
2985   Notes:
2986    This can also be set by the command line option: -mat_use_hash_table fact
2987 
2988 .keywords: matrix, hashtable, factor, HT
2989 
2990 .seealso: MatSetOption()
2991 @*/
2992 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2993 {
2994   PetscErrorCode ierr,(*f)(Mat,PetscReal);
2995 
2996   PetscFunctionBegin;
2997   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2998   if (f) {
2999     ierr = (*f)(mat,fact);CHKERRQ(ierr);
3000   }
3001   PetscFunctionReturn(0);
3002 }
3003 
3004 EXTERN_C_BEGIN
3005 #undef __FUNCT__
3006 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3007 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3008 {
3009   Mat_MPIBAIJ *baij;
3010 
3011   PetscFunctionBegin;
3012   baij = (Mat_MPIBAIJ*)mat->data;
3013   baij->ht_fact = fact;
3014   PetscFunctionReturn(0);
3015 }
3016 EXTERN_C_END
3017 
3018 #undef __FUNCT__
3019 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3020 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3021 {
3022   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3023   PetscFunctionBegin;
3024   *Ad     = a->A;
3025   *Ao     = a->B;
3026   *colmap = a->garray;
3027   PetscFunctionReturn(0);
3028 }
3029