xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 1d2e40055fe3bf783df36eeeccf2d7d5fcfab0fd)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 
5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat);
15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
16 
17 /*  UGLY, ugly, ugly
18    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
19    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
20    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
21    converts the entries into single precision and then calls ..._MatScalar() to put them
22    into the single precision data structures.
23 */
24 #if defined(PETSC_USE_MAT_SINGLE)
25 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
26 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
27 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
28 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
29 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
30 #else
31 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
32 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
33 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
34 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
35 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
36 #endif
37 
38 #undef __FUNCT__
39 #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
40 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v)
41 {
42   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
43   PetscErrorCode ierr;
44   PetscInt       i;
45   PetscScalar    *va,*vb;
46   Vec            vtmp;
47 
48   PetscFunctionBegin;
49 
50   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
51   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
52 
53   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr);
54   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
55   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
56 
57   for (i=0; i<A->rmap.n; i++){
58     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
59   }
60 
61   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
62   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
63   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
64 
65   PetscFunctionReturn(0);
66 }
67 
68 EXTERN_C_BEGIN
69 #undef __FUNCT__
70 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
71 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
72 {
73   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
74   PetscErrorCode ierr;
75 
76   PetscFunctionBegin;
77   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
78   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
79   PetscFunctionReturn(0);
80 }
81 EXTERN_C_END
82 
83 EXTERN_C_BEGIN
84 #undef __FUNCT__
85 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
86 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
87 {
88   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
89   PetscErrorCode ierr;
90 
91   PetscFunctionBegin;
92   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
93   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
94   PetscFunctionReturn(0);
95 }
96 EXTERN_C_END
97 
98 /*
99      Local utility routine that creates a mapping from the global column
100    number to the local number in the off-diagonal part of the local
101    storage of the matrix.  This is done in a non scable way since the
102    length of colmap equals the global matrix length.
103 */
104 #undef __FUNCT__
105 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
106 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
107 {
108   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
109   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
110   PetscErrorCode ierr;
111   PetscInt       nbs = B->nbs,i,bs=mat->rmap.bs;
112 
113   PetscFunctionBegin;
114 #if defined (PETSC_USE_CTABLE)
115   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
116   for (i=0; i<nbs; i++){
117     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
118   }
119 #else
120   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
121   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
122   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
123   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
124 #endif
125   PetscFunctionReturn(0);
126 }
127 
128 #define CHUNKSIZE  10
129 
130 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
131 { \
132  \
133     brow = row/bs;  \
134     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135     rmax = aimax[brow]; nrow = ailen[brow]; \
136       bcol = col/bs; \
137       ridx = row % bs; cidx = col % bs; \
138       low = 0; high = nrow; \
139       while (high-low > 3) { \
140         t = (low+high)/2; \
141         if (rp[t] > bcol) high = t; \
142         else              low  = t; \
143       } \
144       for (_i=low; _i<high; _i++) { \
145         if (rp[_i] > bcol) break; \
146         if (rp[_i] == bcol) { \
147           bap  = ap +  bs2*_i + bs*cidx + ridx; \
148           if (addv == ADD_VALUES) *bap += value;  \
149           else                    *bap  = value;  \
150           goto a_noinsert; \
151         } \
152       } \
153       if (a->nonew == 1) goto a_noinsert; \
154       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
155       MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \
156       N = nrow++ - 1;  \
157       /* shift up all the later entries in this row */ \
158       for (ii=N; ii>=_i; ii--) { \
159         rp[ii+1] = rp[ii]; \
160         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
161       } \
162       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
163       rp[_i]                      = bcol;  \
164       ap[bs2*_i + bs*cidx + ridx] = value;  \
165       a_noinsert:; \
166     ailen[brow] = nrow; \
167 }
168 
169 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
170 { \
171     brow = row/bs;  \
172     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
173     rmax = bimax[brow]; nrow = bilen[brow]; \
174       bcol = col/bs; \
175       ridx = row % bs; cidx = col % bs; \
176       low = 0; high = nrow; \
177       while (high-low > 3) { \
178         t = (low+high)/2; \
179         if (rp[t] > bcol) high = t; \
180         else              low  = t; \
181       } \
182       for (_i=low; _i<high; _i++) { \
183         if (rp[_i] > bcol) break; \
184         if (rp[_i] == bcol) { \
185           bap  = ap +  bs2*_i + bs*cidx + ridx; \
186           if (addv == ADD_VALUES) *bap += value;  \
187           else                    *bap  = value;  \
188           goto b_noinsert; \
189         } \
190       } \
191       if (b->nonew == 1) goto b_noinsert; \
192       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
193       MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \
194       CHKMEMQ;\
195       N = nrow++ - 1;  \
196       /* shift up all the later entries in this row */ \
197       for (ii=N; ii>=_i; ii--) { \
198         rp[ii+1] = rp[ii]; \
199         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
200       } \
201       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
202       rp[_i]                      = bcol;  \
203       ap[bs2*_i + bs*cidx + ridx] = value;  \
204       b_noinsert:; \
205     bilen[brow] = nrow; \
206 }
207 
208 #if defined(PETSC_USE_MAT_SINGLE)
209 #undef __FUNCT__
210 #define __FUNCT__ "MatSetValues_MPIBAIJ"
211 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
212 {
213   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
214   PetscErrorCode ierr;
215   PetscInt       i,N = m*n;
216   MatScalar      *vsingle;
217 
218   PetscFunctionBegin;
219   if (N > b->setvalueslen) {
220     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
221     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
222     b->setvalueslen  = N;
223   }
224   vsingle = b->setvaluescopy;
225 
226   for (i=0; i<N; i++) {
227     vsingle[i] = v[i];
228   }
229   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
230   PetscFunctionReturn(0);
231 }
232 
233 #undef __FUNCT__
234 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
235 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
236 {
237   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
238   PetscErrorCode ierr;
239   PetscInt       i,N = m*n*b->bs2;
240   MatScalar      *vsingle;
241 
242   PetscFunctionBegin;
243   if (N > b->setvalueslen) {
244     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
245     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
246     b->setvalueslen  = N;
247   }
248   vsingle = b->setvaluescopy;
249   for (i=0; i<N; i++) {
250     vsingle[i] = v[i];
251   }
252   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
258 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
259 {
260   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
261   PetscErrorCode ierr;
262   PetscInt       i,N = m*n;
263   MatScalar      *vsingle;
264 
265   PetscFunctionBegin;
266   if (N > b->setvalueslen) {
267     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
268     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
269     b->setvalueslen  = N;
270   }
271   vsingle = b->setvaluescopy;
272   for (i=0; i<N; i++) {
273     vsingle[i] = v[i];
274   }
275   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
281 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
282 {
283   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
284   PetscErrorCode ierr;
285   PetscInt       i,N = m*n*b->bs2;
286   MatScalar      *vsingle;
287 
288   PetscFunctionBegin;
289   if (N > b->setvalueslen) {
290     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
291     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
292     b->setvalueslen  = N;
293   }
294   vsingle = b->setvaluescopy;
295   for (i=0; i<N; i++) {
296     vsingle[i] = v[i];
297   }
298   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
299   PetscFunctionReturn(0);
300 }
301 #endif
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
305 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
306 {
307   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
308   MatScalar      value;
309   PetscTruth     roworiented = baij->roworiented;
310   PetscErrorCode ierr;
311   PetscInt       i,j,row,col;
312   PetscInt       rstart_orig=mat->rmap.rstart;
313   PetscInt       rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart;
314   PetscInt       cend_orig=mat->cmap.rend,bs=mat->rmap.bs;
315 
316   /* Some Variables required in the macro */
317   Mat            A = baij->A;
318   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
319   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
320   MatScalar      *aa=a->a;
321 
322   Mat            B = baij->B;
323   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
324   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
325   MatScalar      *ba=b->a;
326 
327   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
328   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
329   MatScalar      *ap,*bap;
330 
331   PetscFunctionBegin;
332   for (i=0; i<m; i++) {
333     if (im[i] < 0) continue;
334 #if defined(PETSC_USE_DEBUG)
335     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
336 #endif
337     if (im[i] >= rstart_orig && im[i] < rend_orig) {
338       row = im[i] - rstart_orig;
339       for (j=0; j<n; j++) {
340         if (in[j] >= cstart_orig && in[j] < cend_orig){
341           col = in[j] - cstart_orig;
342           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
343           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
344           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
345         } else if (in[j] < 0) continue;
346 #if defined(PETSC_USE_DEBUG)
347         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap.N-1);}
348 #endif
349         else {
350           if (mat->was_assembled) {
351             if (!baij->colmap) {
352               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
353             }
354 #if defined (PETSC_USE_CTABLE)
355             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
356             col  = col - 1;
357 #else
358             col = baij->colmap[in[j]/bs] - 1;
359 #endif
360             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
361               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
362               col =  in[j];
363               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
364               B = baij->B;
365               b = (Mat_SeqBAIJ*)(B)->data;
366               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
367               ba=b->a;
368             } else col += in[j]%bs;
369           } else col = in[j];
370           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
371           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
372           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
373         }
374       }
375     } else {
376       if (!baij->donotstash) {
377         if (roworiented) {
378           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
379         } else {
380           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
381         }
382       }
383     }
384   }
385   PetscFunctionReturn(0);
386 }
387 
388 #undef __FUNCT__
389 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
390 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
391 {
392   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
393   const MatScalar *value;
394   MatScalar       *barray=baij->barray;
395   PetscTruth      roworiented = baij->roworiented;
396   PetscErrorCode  ierr;
397   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
398   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
399   PetscInt        cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2;
400 
401   PetscFunctionBegin;
402   if(!barray) {
403     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
404     baij->barray = barray;
405   }
406 
407   if (roworiented) {
408     stepval = (n-1)*bs;
409   } else {
410     stepval = (m-1)*bs;
411   }
412   for (i=0; i<m; i++) {
413     if (im[i] < 0) continue;
414 #if defined(PETSC_USE_DEBUG)
415     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
416 #endif
417     if (im[i] >= rstart && im[i] < rend) {
418       row = im[i] - rstart;
419       for (j=0; j<n; j++) {
420         /* If NumCol = 1 then a copy is not required */
421         if ((roworiented) && (n == 1)) {
422           barray = (MatScalar*)v + i*bs2;
423         } else if((!roworiented) && (m == 1)) {
424           barray = (MatScalar*)v + j*bs2;
425         } else { /* Here a copy is required */
426           if (roworiented) {
427             value = v + i*(stepval+bs)*bs + j*bs;
428           } else {
429             value = v + j*(stepval+bs)*bs + i*bs;
430           }
431           for (ii=0; ii<bs; ii++,value+=stepval) {
432             for (jj=0; jj<bs; jj++) {
433               *barray++  = *value++;
434             }
435           }
436           barray -=bs2;
437         }
438 
439         if (in[j] >= cstart && in[j] < cend){
440           col  = in[j] - cstart;
441           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
442         }
443         else if (in[j] < 0) continue;
444 #if defined(PETSC_USE_DEBUG)
445         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
446 #endif
447         else {
448           if (mat->was_assembled) {
449             if (!baij->colmap) {
450               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
451             }
452 
453 #if defined(PETSC_USE_DEBUG)
454 #if defined (PETSC_USE_CTABLE)
455             { PetscInt data;
456               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
457               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
458             }
459 #else
460             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
461 #endif
462 #endif
463 #if defined (PETSC_USE_CTABLE)
464 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
465             col  = (col - 1)/bs;
466 #else
467             col = (baij->colmap[in[j]] - 1)/bs;
468 #endif
469             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
470               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
471               col =  in[j];
472             }
473           }
474           else col = in[j];
475           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
476         }
477       }
478     } else {
479       if (!baij->donotstash) {
480         if (roworiented) {
481           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
482         } else {
483           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
484         }
485       }
486     }
487   }
488   PetscFunctionReturn(0);
489 }
490 
491 #define HASH_KEY 0.6180339887
492 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
493 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
494 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
495 #undef __FUNCT__
496 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
497 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
498 {
499   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
500   PetscTruth     roworiented = baij->roworiented;
501   PetscErrorCode ierr;
502   PetscInt       i,j,row,col;
503   PetscInt       rstart_orig=mat->rmap.rstart;
504   PetscInt       rend_orig=mat->rmap.rend,Nbs=baij->Nbs;
505   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap.bs,*HT=baij->ht,idx;
506   PetscReal      tmp;
507   MatScalar      **HD = baij->hd,value;
508 #if defined(PETSC_USE_DEBUG)
509   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
510 #endif
511 
512   PetscFunctionBegin;
513 
514   for (i=0; i<m; i++) {
515 #if defined(PETSC_USE_DEBUG)
516     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
517     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
518 #endif
519       row = im[i];
520     if (row >= rstart_orig && row < rend_orig) {
521       for (j=0; j<n; j++) {
522         col = in[j];
523         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
524         /* Look up PetscInto the Hash Table */
525         key = (row/bs)*Nbs+(col/bs)+1;
526         h1  = HASH(size,key,tmp);
527 
528 
529         idx = h1;
530 #if defined(PETSC_USE_DEBUG)
531         insert_ct++;
532         total_ct++;
533         if (HT[idx] != key) {
534           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
535           if (idx == size) {
536             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
537             if (idx == h1) {
538               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
539             }
540           }
541         }
542 #else
543         if (HT[idx] != key) {
544           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
545           if (idx == size) {
546             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
547             if (idx == h1) {
548               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
549             }
550           }
551         }
552 #endif
553         /* A HASH table entry is found, so insert the values at the correct address */
554         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
555         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
556       }
557     } else {
558       if (!baij->donotstash) {
559         if (roworiented) {
560           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
561         } else {
562           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
563         }
564       }
565     }
566   }
567 #if defined(PETSC_USE_DEBUG)
568   baij->ht_total_ct = total_ct;
569   baij->ht_insert_ct = insert_ct;
570 #endif
571   PetscFunctionReturn(0);
572 }
573 
574 #undef __FUNCT__
575 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
576 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
577 {
578   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
579   PetscTruth      roworiented = baij->roworiented;
580   PetscErrorCode  ierr;
581   PetscInt        i,j,ii,jj,row,col;
582   PetscInt        rstart=baij->rstartbs;
583   PetscInt        rend=mat->rmap.rend,stepval,bs=mat->rmap.bs,bs2=baij->bs2;
584   PetscInt        h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
585   PetscReal       tmp;
586   MatScalar       **HD = baij->hd,*baij_a;
587   const MatScalar *v_t,*value;
588 #if defined(PETSC_USE_DEBUG)
589   PetscInt        total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
590 #endif
591 
592   PetscFunctionBegin;
593 
594   if (roworiented) {
595     stepval = (n-1)*bs;
596   } else {
597     stepval = (m-1)*bs;
598   }
599   for (i=0; i<m; i++) {
600 #if defined(PETSC_USE_DEBUG)
601     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
602     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
603 #endif
604     row   = im[i];
605     v_t   = v + i*bs2;
606     if (row >= rstart && row < rend) {
607       for (j=0; j<n; j++) {
608         col = in[j];
609 
610         /* Look up into the Hash Table */
611         key = row*Nbs+col+1;
612         h1  = HASH(size,key,tmp);
613 
614         idx = h1;
615 #if defined(PETSC_USE_DEBUG)
616         total_ct++;
617         insert_ct++;
618        if (HT[idx] != key) {
619           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
620           if (idx == size) {
621             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
622             if (idx == h1) {
623               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
624             }
625           }
626         }
627 #else
628         if (HT[idx] != key) {
629           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
630           if (idx == size) {
631             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
632             if (idx == h1) {
633               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
634             }
635           }
636         }
637 #endif
638         baij_a = HD[idx];
639         if (roworiented) {
640           /*value = v + i*(stepval+bs)*bs + j*bs;*/
641           /* value = v + (i*(stepval+bs)+j)*bs; */
642           value = v_t;
643           v_t  += bs;
644           if (addv == ADD_VALUES) {
645             for (ii=0; ii<bs; ii++,value+=stepval) {
646               for (jj=ii; jj<bs2; jj+=bs) {
647                 baij_a[jj]  += *value++;
648               }
649             }
650           } else {
651             for (ii=0; ii<bs; ii++,value+=stepval) {
652               for (jj=ii; jj<bs2; jj+=bs) {
653                 baij_a[jj]  = *value++;
654               }
655             }
656           }
657         } else {
658           value = v + j*(stepval+bs)*bs + i*bs;
659           if (addv == ADD_VALUES) {
660             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
661               for (jj=0; jj<bs; jj++) {
662                 baij_a[jj]  += *value++;
663               }
664             }
665           } else {
666             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
667               for (jj=0; jj<bs; jj++) {
668                 baij_a[jj]  = *value++;
669               }
670             }
671           }
672         }
673       }
674     } else {
675       if (!baij->donotstash) {
676         if (roworiented) {
677           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
678         } else {
679           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
680         }
681       }
682     }
683   }
684 #if defined(PETSC_USE_DEBUG)
685   baij->ht_total_ct = total_ct;
686   baij->ht_insert_ct = insert_ct;
687 #endif
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatGetValues_MPIBAIJ"
693 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
694 {
695   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
696   PetscErrorCode ierr;
697   PetscInt       bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend;
698   PetscInt       bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data;
699 
700   PetscFunctionBegin;
701   for (i=0; i<m; i++) {
702     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
703     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
704     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
705       row = idxm[i] - bsrstart;
706       for (j=0; j<n; j++) {
707         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
708         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
709         if (idxn[j] >= bscstart && idxn[j] < bscend){
710           col = idxn[j] - bscstart;
711           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
712         } else {
713           if (!baij->colmap) {
714             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
715           }
716 #if defined (PETSC_USE_CTABLE)
717           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
718           data --;
719 #else
720           data = baij->colmap[idxn[j]/bs]-1;
721 #endif
722           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
723           else {
724             col  = data + idxn[j]%bs;
725             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
726           }
727         }
728       }
729     } else {
730       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
731     }
732   }
733  PetscFunctionReturn(0);
734 }
735 
736 #undef __FUNCT__
737 #define __FUNCT__ "MatNorm_MPIBAIJ"
738 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
739 {
740   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
741   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
742   PetscErrorCode ierr;
743   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap.bs,nz,row,col;
744   PetscReal      sum = 0.0;
745   MatScalar      *v;
746 
747   PetscFunctionBegin;
748   if (baij->size == 1) {
749     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
750   } else {
751     if (type == NORM_FROBENIUS) {
752       v = amat->a;
753       nz = amat->nz*bs2;
754       for (i=0; i<nz; i++) {
755 #if defined(PETSC_USE_COMPLEX)
756         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
757 #else
758         sum += (*v)*(*v); v++;
759 #endif
760       }
761       v = bmat->a;
762       nz = bmat->nz*bs2;
763       for (i=0; i<nz; i++) {
764 #if defined(PETSC_USE_COMPLEX)
765         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
766 #else
767         sum += (*v)*(*v); v++;
768 #endif
769       }
770       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
771       *nrm = sqrt(*nrm);
772     } else if (type == NORM_1) { /* max column sum */
773       PetscReal *tmp,*tmp2;
774       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
775       ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
776       tmp2 = tmp + mat->cmap.N;
777       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
778       v = amat->a; jj = amat->j;
779       for (i=0; i<amat->nz; i++) {
780         for (j=0; j<bs; j++){
781           col = bs*(cstart + *jj) + j; /* column index */
782           for (row=0; row<bs; row++){
783             tmp[col] += PetscAbsScalar(*v);  v++;
784           }
785         }
786         jj++;
787       }
788       v = bmat->a; jj = bmat->j;
789       for (i=0; i<bmat->nz; i++) {
790         for (j=0; j<bs; j++){
791           col = bs*garray[*jj] + j;
792           for (row=0; row<bs; row++){
793             tmp[col] += PetscAbsScalar(*v); v++;
794           }
795         }
796         jj++;
797       }
798       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
799       *nrm = 0.0;
800       for (j=0; j<mat->cmap.N; j++) {
801         if (tmp2[j] > *nrm) *nrm = tmp2[j];
802       }
803       ierr = PetscFree(tmp);CHKERRQ(ierr);
804     } else if (type == NORM_INFINITY) { /* max row sum */
805       PetscReal *sums;
806       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
807       sum = 0.0;
808       for (j=0; j<amat->mbs; j++) {
809         for (row=0; row<bs; row++) sums[row] = 0.0;
810         v = amat->a + bs2*amat->i[j];
811         nz = amat->i[j+1]-amat->i[j];
812         for (i=0; i<nz; i++) {
813           for (col=0; col<bs; col++){
814             for (row=0; row<bs; row++){
815               sums[row] += PetscAbsScalar(*v); v++;
816             }
817           }
818         }
819         v = bmat->a + bs2*bmat->i[j];
820         nz = bmat->i[j+1]-bmat->i[j];
821         for (i=0; i<nz; i++) {
822           for (col=0; col<bs; col++){
823             for (row=0; row<bs; row++){
824               sums[row] += PetscAbsScalar(*v); v++;
825             }
826           }
827         }
828         for (row=0; row<bs; row++){
829           if (sums[row] > sum) sum = sums[row];
830         }
831       }
832       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
833       ierr = PetscFree(sums);CHKERRQ(ierr);
834     } else {
835       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
836     }
837   }
838   PetscFunctionReturn(0);
839 }
840 
841 /*
842   Creates the hash table, and sets the table
843   This table is created only once.
844   If new entried need to be added to the matrix
845   then the hash table has to be destroyed and
846   recreated.
847 */
848 #undef __FUNCT__
849 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
850 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
851 {
852   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
853   Mat            A = baij->A,B=baij->B;
854   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
855   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
856   PetscErrorCode ierr;
857   PetscInt       size,bs2=baij->bs2,rstart=baij->rstartbs;
858   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
859   PetscInt       *HT,key;
860   MatScalar      **HD;
861   PetscReal      tmp;
862 #if defined(PETSC_USE_INFO)
863   PetscInt       ct=0,max=0;
864 #endif
865 
866   PetscFunctionBegin;
867   baij->ht_size=(PetscInt)(factor*nz);
868   size = baij->ht_size;
869 
870   if (baij->ht) {
871     PetscFunctionReturn(0);
872   }
873 
874   /* Allocate Memory for Hash Table */
875   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
876   baij->ht = (PetscInt*)(baij->hd + size);
877   HD       = baij->hd;
878   HT       = baij->ht;
879 
880 
881   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
882 
883 
884   /* Loop Over A */
885   for (i=0; i<a->mbs; i++) {
886     for (j=ai[i]; j<ai[i+1]; j++) {
887       row = i+rstart;
888       col = aj[j]+cstart;
889 
890       key = row*Nbs + col + 1;
891       h1  = HASH(size,key,tmp);
892       for (k=0; k<size; k++){
893         if (!HT[(h1+k)%size]) {
894           HT[(h1+k)%size] = key;
895           HD[(h1+k)%size] = a->a + j*bs2;
896           break;
897 #if defined(PETSC_USE_INFO)
898         } else {
899           ct++;
900 #endif
901         }
902       }
903 #if defined(PETSC_USE_INFO)
904       if (k> max) max = k;
905 #endif
906     }
907   }
908   /* Loop Over B */
909   for (i=0; i<b->mbs; i++) {
910     for (j=bi[i]; j<bi[i+1]; j++) {
911       row = i+rstart;
912       col = garray[bj[j]];
913       key = row*Nbs + col + 1;
914       h1  = HASH(size,key,tmp);
915       for (k=0; k<size; k++){
916         if (!HT[(h1+k)%size]) {
917           HT[(h1+k)%size] = key;
918           HD[(h1+k)%size] = b->a + j*bs2;
919           break;
920 #if defined(PETSC_USE_INFO)
921         } else {
922           ct++;
923 #endif
924         }
925       }
926 #if defined(PETSC_USE_INFO)
927       if (k> max) max = k;
928 #endif
929     }
930   }
931 
932   /* Print Summary */
933 #if defined(PETSC_USE_INFO)
934   for (i=0,j=0; i<size; i++) {
935     if (HT[i]) {j++;}
936   }
937   ierr = PetscInfo2(0,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
938 #endif
939   PetscFunctionReturn(0);
940 }
941 
942 #undef __FUNCT__
943 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
944 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
945 {
946   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
947   PetscErrorCode ierr;
948   PetscInt       nstash,reallocs;
949   InsertMode     addv;
950 
951   PetscFunctionBegin;
952   if (baij->donotstash) {
953     PetscFunctionReturn(0);
954   }
955 
956   /* make sure all processors are either in INSERTMODE or ADDMODE */
957   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
958   if (addv == (ADD_VALUES|INSERT_VALUES)) {
959     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
960   }
961   mat->insertmode = addv; /* in case this processor had no cache */
962 
963   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
964   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr);
965   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
966   ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
967   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
968   ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 #undef __FUNCT__
973 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
974 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
975 {
976   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
977   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
978   PetscErrorCode ierr;
979   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
980   PetscInt       *row,*col,other_disassembled;
981   PetscTruth     r1,r2,r3;
982   MatScalar      *val;
983   InsertMode     addv = mat->insertmode;
984   PetscMPIInt    n;
985 
986   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
987   PetscFunctionBegin;
988   if (!baij->donotstash) {
989     while (1) {
990       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
991       if (!flg) break;
992 
993       for (i=0; i<n;) {
994         /* Now identify the consecutive vals belonging to the same row */
995         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
996         if (j < n) ncols = j-i;
997         else       ncols = n-i;
998         /* Now assemble all these values with a single function call */
999         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
1000         i = j;
1001       }
1002     }
1003     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1004     /* Now process the block-stash. Since the values are stashed column-oriented,
1005        set the roworiented flag to column oriented, and after MatSetValues()
1006        restore the original flags */
1007     r1 = baij->roworiented;
1008     r2 = a->roworiented;
1009     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
1010     baij->roworiented = PETSC_FALSE;
1011     a->roworiented    = PETSC_FALSE;
1012     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
1013     while (1) {
1014       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1015       if (!flg) break;
1016 
1017       for (i=0; i<n;) {
1018         /* Now identify the consecutive vals belonging to the same row */
1019         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1020         if (j < n) ncols = j-i;
1021         else       ncols = n-i;
1022         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1023         i = j;
1024       }
1025     }
1026     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1027     baij->roworiented = r1;
1028     a->roworiented    = r2;
1029     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
1030   }
1031 
1032   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1033   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1034 
1035   /* determine if any processor has disassembled, if so we must
1036      also disassemble ourselfs, in order that we may reassemble. */
1037   /*
1038      if nonzero structure of submatrix B cannot change then we know that
1039      no processor disassembled thus we can skip this stuff
1040   */
1041   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1042     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1043     if (mat->was_assembled && !other_disassembled) {
1044       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1045     }
1046   }
1047 
1048   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1049     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1050   }
1051   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
1052   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1053   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1054 
1055 #if defined(PETSC_USE_INFO)
1056   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1057     ierr = PetscInfo1(0,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
1058     baij->ht_total_ct  = 0;
1059     baij->ht_insert_ct = 0;
1060   }
1061 #endif
1062   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1063     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1064     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1065     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1066   }
1067 
1068   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1069   baij->rowvalues = 0;
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 #undef __FUNCT__
1074 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1075 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1076 {
1077   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1078   PetscErrorCode    ierr;
1079   PetscMPIInt       size = baij->size,rank = baij->rank;
1080   PetscInt          bs = mat->rmap.bs;
1081   PetscTruth        iascii,isdraw;
1082   PetscViewer       sviewer;
1083   PetscViewerFormat format;
1084 
1085   PetscFunctionBegin;
1086   /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */
1087   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1088   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1089   if (iascii) {
1090     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1091     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1092       MatInfo info;
1093       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1094       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1095       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
1096               rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
1097               mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr);
1098       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1099       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1100       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1101       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1102       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1103       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1104       PetscFunctionReturn(0);
1105     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1106       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1107       PetscFunctionReturn(0);
1108     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1109       PetscFunctionReturn(0);
1110     }
1111   }
1112 
1113   if (isdraw) {
1114     PetscDraw       draw;
1115     PetscTruth isnull;
1116     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1117     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1118   }
1119 
1120   if (size == 1) {
1121     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1122     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1123   } else {
1124     /* assemble the entire matrix onto first processor. */
1125     Mat         A;
1126     Mat_SeqBAIJ *Aloc;
1127     PetscInt    M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1128     MatScalar   *a;
1129 
1130     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1131     /* Perhaps this should be the type of mat? */
1132     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
1133     if (!rank) {
1134       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1135     } else {
1136       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1137     }
1138     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1139     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1140     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1141 
1142     /* copy over the A part */
1143     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1144     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1145     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1146 
1147     for (i=0; i<mbs; i++) {
1148       rvals[0] = bs*(baij->rstartbs + i);
1149       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1150       for (j=ai[i]; j<ai[i+1]; j++) {
1151         col = (baij->cstartbs+aj[j])*bs;
1152         for (k=0; k<bs; k++) {
1153           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1154           col++; a += bs;
1155         }
1156       }
1157     }
1158     /* copy over the B part */
1159     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1160     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1161     for (i=0; i<mbs; i++) {
1162       rvals[0] = bs*(baij->rstartbs + i);
1163       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1164       for (j=ai[i]; j<ai[i+1]; j++) {
1165         col = baij->garray[aj[j]]*bs;
1166         for (k=0; k<bs; k++) {
1167           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1168           col++; a += bs;
1169         }
1170       }
1171     }
1172     ierr = PetscFree(rvals);CHKERRQ(ierr);
1173     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1174     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1175     /*
1176        Everyone has to call to draw the matrix since the graphics waits are
1177        synchronized across all processors that share the PetscDraw object
1178     */
1179     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1180     if (!rank) {
1181       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1182       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1183     }
1184     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1185     ierr = MatDestroy(A);CHKERRQ(ierr);
1186   }
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 #undef __FUNCT__
1191 #define __FUNCT__ "MatView_MPIBAIJ"
1192 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1193 {
1194   PetscErrorCode ierr;
1195   PetscTruth     iascii,isdraw,issocket,isbinary;
1196 
1197   PetscFunctionBegin;
1198   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1199   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1200   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1201   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1202   if (iascii || isdraw || issocket || isbinary) {
1203     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1204   } else {
1205     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1206   }
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 #undef __FUNCT__
1211 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1212 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1213 {
1214   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1215   PetscErrorCode ierr;
1216 
1217   PetscFunctionBegin;
1218 #if defined(PETSC_USE_LOG)
1219   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N);
1220 #endif
1221   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1222   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1223   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1224   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1225 #if defined (PETSC_USE_CTABLE)
1226   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1227 #else
1228   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1229 #endif
1230   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1231   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1232   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1233   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1234   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1235   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1236 #if defined(PETSC_USE_MAT_SINGLE)
1237   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1238 #endif
1239   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1240   ierr = PetscFree(baij);CHKERRQ(ierr);
1241 
1242   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1243   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1244   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1245   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1246   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1247   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1248   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "MatMult_MPIBAIJ"
1254 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1255 {
1256   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1257   PetscErrorCode ierr;
1258   PetscInt       nt;
1259 
1260   PetscFunctionBegin;
1261   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1262   if (nt != A->cmap.n) {
1263     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1264   }
1265   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1266   if (nt != A->rmap.n) {
1267     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1268   }
1269   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1270   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1271   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1272   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1273   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 #undef __FUNCT__
1278 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1279 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1280 {
1281   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1282   PetscErrorCode ierr;
1283 
1284   PetscFunctionBegin;
1285   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1286   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1287   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1288   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1289   PetscFunctionReturn(0);
1290 }
1291 
1292 #undef __FUNCT__
1293 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1294 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1295 {
1296   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1297   PetscErrorCode ierr;
1298   PetscTruth     merged;
1299 
1300   PetscFunctionBegin;
1301   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1302   /* do nondiagonal part */
1303   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1304   if (!merged) {
1305     /* send it on its way */
1306     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1307     /* do local part */
1308     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1309     /* receive remote parts: note this assumes the values are not actually */
1310     /* inserted in yy until the next line */
1311     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1312   } else {
1313     /* do local part */
1314     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1315     /* send it on its way */
1316     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1317     /* values actually were received in the Begin() but we need to call this nop */
1318     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1319   }
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1325 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1326 {
1327   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1328   PetscErrorCode ierr;
1329 
1330   PetscFunctionBegin;
1331   /* do nondiagonal part */
1332   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1333   /* send it on its way */
1334   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1335   /* do local part */
1336   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1337   /* receive remote parts: note this assumes the values are not actually */
1338   /* inserted in yy until the next line, which is true for my implementation*/
1339   /* but is not perhaps always true. */
1340   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 /*
1345   This only works correctly for square matrices where the subblock A->A is the
1346    diagonal block
1347 */
1348 #undef __FUNCT__
1349 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1350 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1351 {
1352   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1353   PetscErrorCode ierr;
1354 
1355   PetscFunctionBegin;
1356   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1357   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1358   PetscFunctionReturn(0);
1359 }
1360 
1361 #undef __FUNCT__
1362 #define __FUNCT__ "MatScale_MPIBAIJ"
1363 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1364 {
1365   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1366   PetscErrorCode ierr;
1367 
1368   PetscFunctionBegin;
1369   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1370   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1371   PetscFunctionReturn(0);
1372 }
1373 
1374 #undef __FUNCT__
1375 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1376 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1377 {
1378   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1379   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1380   PetscErrorCode ierr;
1381   PetscInt       bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1382   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend;
1383   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1384 
1385   PetscFunctionBegin;
1386   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1387   mat->getrowactive = PETSC_TRUE;
1388 
1389   if (!mat->rowvalues && (idx || v)) {
1390     /*
1391         allocate enough space to hold information from the longest row.
1392     */
1393     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1394     PetscInt     max = 1,mbs = mat->mbs,tmp;
1395     for (i=0; i<mbs; i++) {
1396       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1397       if (max < tmp) { max = tmp; }
1398     }
1399     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1400     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1401   }
1402 
1403   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1404   lrow = row - brstart;
1405 
1406   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1407   if (!v)   {pvA = 0; pvB = 0;}
1408   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1409   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1410   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1411   nztot = nzA + nzB;
1412 
1413   cmap  = mat->garray;
1414   if (v  || idx) {
1415     if (nztot) {
1416       /* Sort by increasing column numbers, assuming A and B already sorted */
1417       PetscInt imark = -1;
1418       if (v) {
1419         *v = v_p = mat->rowvalues;
1420         for (i=0; i<nzB; i++) {
1421           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1422           else break;
1423         }
1424         imark = i;
1425         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1426         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1427       }
1428       if (idx) {
1429         *idx = idx_p = mat->rowindices;
1430         if (imark > -1) {
1431           for (i=0; i<imark; i++) {
1432             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1433           }
1434         } else {
1435           for (i=0; i<nzB; i++) {
1436             if (cmap[cworkB[i]/bs] < cstart)
1437               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1438             else break;
1439           }
1440           imark = i;
1441         }
1442         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1443         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1444       }
1445     } else {
1446       if (idx) *idx = 0;
1447       if (v)   *v   = 0;
1448     }
1449   }
1450   *nz = nztot;
1451   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1452   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1453   PetscFunctionReturn(0);
1454 }
1455 
1456 #undef __FUNCT__
1457 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1458 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1459 {
1460   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1461 
1462   PetscFunctionBegin;
1463   if (!baij->getrowactive) {
1464     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1465   }
1466   baij->getrowactive = PETSC_FALSE;
1467   PetscFunctionReturn(0);
1468 }
1469 
1470 #undef __FUNCT__
1471 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1472 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1473 {
1474   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1475   PetscErrorCode ierr;
1476 
1477   PetscFunctionBegin;
1478   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1479   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1480   PetscFunctionReturn(0);
1481 }
1482 
1483 #undef __FUNCT__
1484 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1485 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1486 {
1487   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1488   Mat            A = a->A,B = a->B;
1489   PetscErrorCode ierr;
1490   PetscReal      isend[5],irecv[5];
1491 
1492   PetscFunctionBegin;
1493   info->block_size     = (PetscReal)matin->rmap.bs;
1494   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1495   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1496   isend[3] = info->memory;  isend[4] = info->mallocs;
1497   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1498   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1499   isend[3] += info->memory;  isend[4] += info->mallocs;
1500   if (flag == MAT_LOCAL) {
1501     info->nz_used      = isend[0];
1502     info->nz_allocated = isend[1];
1503     info->nz_unneeded  = isend[2];
1504     info->memory       = isend[3];
1505     info->mallocs      = isend[4];
1506   } else if (flag == MAT_GLOBAL_MAX) {
1507     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1508     info->nz_used      = irecv[0];
1509     info->nz_allocated = irecv[1];
1510     info->nz_unneeded  = irecv[2];
1511     info->memory       = irecv[3];
1512     info->mallocs      = irecv[4];
1513   } else if (flag == MAT_GLOBAL_SUM) {
1514     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1515     info->nz_used      = irecv[0];
1516     info->nz_allocated = irecv[1];
1517     info->nz_unneeded  = irecv[2];
1518     info->memory       = irecv[3];
1519     info->mallocs      = irecv[4];
1520   } else {
1521     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1522   }
1523   info->rows_global       = (PetscReal)A->rmap.N;
1524   info->columns_global    = (PetscReal)A->cmap.N;
1525   info->rows_local        = (PetscReal)A->rmap.N;
1526   info->columns_local     = (PetscReal)A->cmap.N;
1527   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1528   info->fill_ratio_needed = 0;
1529   info->factor_mallocs    = 0;
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 #undef __FUNCT__
1534 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1535 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op)
1536 {
1537   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1538   PetscErrorCode ierr;
1539 
1540   PetscFunctionBegin;
1541   switch (op) {
1542   case MAT_NO_NEW_NONZERO_LOCATIONS:
1543   case MAT_YES_NEW_NONZERO_LOCATIONS:
1544   case MAT_COLUMNS_UNSORTED:
1545   case MAT_COLUMNS_SORTED:
1546   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1547   case MAT_KEEP_ZEROED_ROWS:
1548   case MAT_NEW_NONZERO_LOCATION_ERR:
1549     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1550     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1551     break;
1552   case MAT_ROW_ORIENTED:
1553     a->roworiented = PETSC_TRUE;
1554     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1555     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1556     break;
1557   case MAT_ROWS_SORTED:
1558   case MAT_ROWS_UNSORTED:
1559   case MAT_YES_NEW_DIAGONALS:
1560     ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr);
1561     break;
1562   case MAT_COLUMN_ORIENTED:
1563     a->roworiented = PETSC_FALSE;
1564     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1565     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1566     break;
1567   case MAT_IGNORE_OFF_PROC_ENTRIES:
1568     a->donotstash = PETSC_TRUE;
1569     break;
1570   case MAT_NO_NEW_DIAGONALS:
1571     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1572   case MAT_USE_HASH_TABLE:
1573     a->ht_flag = PETSC_TRUE;
1574     break;
1575   case MAT_SYMMETRIC:
1576   case MAT_STRUCTURALLY_SYMMETRIC:
1577   case MAT_HERMITIAN:
1578   case MAT_SYMMETRY_ETERNAL:
1579     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1580     break;
1581   case MAT_NOT_SYMMETRIC:
1582   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1583   case MAT_NOT_HERMITIAN:
1584   case MAT_NOT_SYMMETRY_ETERNAL:
1585     break;
1586   default:
1587     SETERRQ(PETSC_ERR_SUP,"unknown option");
1588   }
1589   PetscFunctionReturn(0);
1590 }
1591 
1592 #undef __FUNCT__
1593 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1594 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1595 {
1596   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1597   Mat_SeqBAIJ    *Aloc;
1598   Mat            B;
1599   PetscErrorCode ierr;
1600   PetscInt       M=A->rmap.N,N=A->cmap.N,*ai,*aj,i,*rvals,j,k,col;
1601   PetscInt       bs=A->rmap.bs,mbs=baij->mbs;
1602   MatScalar      *a;
1603 
1604   PetscFunctionBegin;
1605   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1606   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1607   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1608   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1609   ierr = MatMPIBAIJSetPreallocation(B,A->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1610 
1611   /* copy over the A part */
1612   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1613   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1614   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1615 
1616   for (i=0; i<mbs; i++) {
1617     rvals[0] = bs*(baij->rstartbs + i);
1618     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1619     for (j=ai[i]; j<ai[i+1]; j++) {
1620       col = (baij->cstartbs+aj[j])*bs;
1621       for (k=0; k<bs; k++) {
1622         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1623         col++; a += bs;
1624       }
1625     }
1626   }
1627   /* copy over the B part */
1628   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1629   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1630   for (i=0; i<mbs; i++) {
1631     rvals[0] = bs*(baij->rstartbs + i);
1632     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1633     for (j=ai[i]; j<ai[i+1]; j++) {
1634       col = baij->garray[aj[j]]*bs;
1635       for (k=0; k<bs; k++) {
1636         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1637         col++; a += bs;
1638       }
1639     }
1640   }
1641   ierr = PetscFree(rvals);CHKERRQ(ierr);
1642   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1643   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1644 
1645   if (matout) {
1646     *matout = B;
1647   } else {
1648     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1649   }
1650   PetscFunctionReturn(0);
1651 }
1652 
1653 #undef __FUNCT__
1654 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1655 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1656 {
1657   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1658   Mat            a = baij->A,b = baij->B;
1659   PetscErrorCode ierr;
1660   PetscInt       s1,s2,s3;
1661 
1662   PetscFunctionBegin;
1663   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1664   if (rr) {
1665     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1666     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1667     /* Overlap communication with computation. */
1668     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1669   }
1670   if (ll) {
1671     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1672     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1673     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1674   }
1675   /* scale  the diagonal block */
1676   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1677 
1678   if (rr) {
1679     /* Do a scatter end and then right scale the off-diagonal block */
1680     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1681     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1682   }
1683 
1684   PetscFunctionReturn(0);
1685 }
1686 
1687 #undef __FUNCT__
1688 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1689 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1690 {
1691   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1692   PetscErrorCode ierr;
1693   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1694   PetscInt       i,*owners = A->rmap.range;
1695   PetscInt       *nprocs,j,idx,nsends,row;
1696   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1697   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1;
1698   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap.rstart;
1699   MPI_Comm       comm = A->comm;
1700   MPI_Request    *send_waits,*recv_waits;
1701   MPI_Status     recv_status,*send_status;
1702 #if defined(PETSC_DEBUG)
1703   PetscTruth     found = PETSC_FALSE;
1704 #endif
1705 
1706   PetscFunctionBegin;
1707   /*  first count number of contributors to each processor */
1708   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1709   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1710   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1711   j = 0;
1712   for (i=0; i<N; i++) {
1713     if (lastidx > (idx = rows[i])) j = 0;
1714     lastidx = idx;
1715     for (; j<size; j++) {
1716       if (idx >= owners[j] && idx < owners[j+1]) {
1717         nprocs[2*j]++;
1718         nprocs[2*j+1] = 1;
1719         owner[i] = j;
1720 #if defined(PETSC_DEBUG)
1721         found = PETSC_TRUE;
1722 #endif
1723         break;
1724       }
1725     }
1726 #if defined(PETSC_DEBUG)
1727     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1728     found = PETSC_FALSE;
1729 #endif
1730   }
1731   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1732 
1733   /* inform other processors of number of messages and max length*/
1734   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1735 
1736   /* post receives:   */
1737   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1738   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1739   for (i=0; i<nrecvs; i++) {
1740     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1741   }
1742 
1743   /* do sends:
1744      1) starts[i] gives the starting index in svalues for stuff going to
1745      the ith processor
1746   */
1747   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1748   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1749   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1750   starts[0]  = 0;
1751   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1752   for (i=0; i<N; i++) {
1753     svalues[starts[owner[i]]++] = rows[i];
1754   }
1755 
1756   starts[0] = 0;
1757   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1758   count = 0;
1759   for (i=0; i<size; i++) {
1760     if (nprocs[2*i+1]) {
1761       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1762     }
1763   }
1764   ierr = PetscFree(starts);CHKERRQ(ierr);
1765 
1766   base = owners[rank];
1767 
1768   /*  wait on receives */
1769   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
1770   source = lens + nrecvs;
1771   count  = nrecvs; slen = 0;
1772   while (count) {
1773     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1774     /* unpack receives into our local space */
1775     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1776     source[imdex]  = recv_status.MPI_SOURCE;
1777     lens[imdex]    = n;
1778     slen          += n;
1779     count--;
1780   }
1781   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1782 
1783   /* move the data into the send scatter */
1784   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1785   count = 0;
1786   for (i=0; i<nrecvs; i++) {
1787     values = rvalues + i*nmax;
1788     for (j=0; j<lens[i]; j++) {
1789       lrows[count++] = values[j] - base;
1790     }
1791   }
1792   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1793   ierr = PetscFree(lens);CHKERRQ(ierr);
1794   ierr = PetscFree(owner);CHKERRQ(ierr);
1795   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1796 
1797   /* actually zap the local rows */
1798   /*
1799         Zero the required rows. If the "diagonal block" of the matrix
1800      is square and the user wishes to set the diagonal we use separate
1801      code so that MatSetValues() is not called for each diagonal allocating
1802      new memory, thus calling lots of mallocs and slowing things down.
1803 
1804        Contributed by: Matthew Knepley
1805   */
1806   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1807   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1808   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
1809     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1810   } else if (diag != 0.0) {
1811     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1812     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1813       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1814 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1815     }
1816     for (i=0; i<slen; i++) {
1817       row  = lrows[i] + rstart_bs;
1818       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1819     }
1820     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1821     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1822   } else {
1823     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1824   }
1825 
1826   ierr = PetscFree(lrows);CHKERRQ(ierr);
1827 
1828   /* wait on sends */
1829   if (nsends) {
1830     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1831     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1832     ierr = PetscFree(send_status);CHKERRQ(ierr);
1833   }
1834   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1835   ierr = PetscFree(svalues);CHKERRQ(ierr);
1836 
1837   PetscFunctionReturn(0);
1838 }
1839 
1840 #undef __FUNCT__
1841 #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1842 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A)
1843 {
1844   Mat_MPIBAIJ       *a   = (Mat_MPIBAIJ*)A->data;
1845   MPI_Comm          comm = A->comm;
1846   static PetscTruth called = PETSC_FALSE;
1847   PetscErrorCode    ierr;
1848 
1849   PetscFunctionBegin;
1850   if (!a->rank) {
1851     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1852   }
1853   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1854   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1855   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1856   PetscFunctionReturn(0);
1857 }
1858 
1859 #undef __FUNCT__
1860 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1861 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1862 {
1863   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1864   PetscErrorCode ierr;
1865 
1866   PetscFunctionBegin;
1867   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1868   PetscFunctionReturn(0);
1869 }
1870 
1871 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1872 
1873 #undef __FUNCT__
1874 #define __FUNCT__ "MatEqual_MPIBAIJ"
1875 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1876 {
1877   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1878   Mat            a,b,c,d;
1879   PetscTruth     flg;
1880   PetscErrorCode ierr;
1881 
1882   PetscFunctionBegin;
1883   a = matA->A; b = matA->B;
1884   c = matB->A; d = matB->B;
1885 
1886   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1887   if (flg) {
1888     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1889   }
1890   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1891   PetscFunctionReturn(0);
1892 }
1893 
1894 #undef __FUNCT__
1895 #define __FUNCT__ "MatCopy_MPIBAIJ"
1896 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1897 {
1898   PetscErrorCode ierr;
1899   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1900   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1901 
1902   PetscFunctionBegin;
1903   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1904   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1905     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1906   } else {
1907     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1908     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1909   }
1910   PetscFunctionReturn(0);
1911 }
1912 
1913 #undef __FUNCT__
1914 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1915 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1916 {
1917   PetscErrorCode ierr;
1918 
1919   PetscFunctionBegin;
1920   ierr =  MatMPIBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1921   PetscFunctionReturn(0);
1922 }
1923 
1924 #include "petscblaslapack.h"
1925 #undef __FUNCT__
1926 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1927 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1928 {
1929   PetscErrorCode ierr;
1930   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1931   PetscBLASInt   bnz,one=1;
1932   Mat_SeqBAIJ    *x,*y;
1933 
1934   PetscFunctionBegin;
1935   if (str == SAME_NONZERO_PATTERN) {
1936     PetscScalar alpha = a;
1937     x = (Mat_SeqBAIJ *)xx->A->data;
1938     y = (Mat_SeqBAIJ *)yy->A->data;
1939     bnz = (PetscBLASInt)x->nz;
1940     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1941     x = (Mat_SeqBAIJ *)xx->B->data;
1942     y = (Mat_SeqBAIJ *)yy->B->data;
1943     bnz = (PetscBLASInt)x->nz;
1944     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1945   } else {
1946     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1947   }
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 #undef __FUNCT__
1952 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1953 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1954 {
1955   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1956   PetscErrorCode ierr;
1957 
1958   PetscFunctionBegin;
1959   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1960   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1961   PetscFunctionReturn(0);
1962 }
1963 
1964 #undef __FUNCT__
1965 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1966 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1967 {
1968   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1969   PetscErrorCode ierr;
1970 
1971   PetscFunctionBegin;
1972   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1973   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1974   PetscFunctionReturn(0);
1975 }
1976 
1977 /* -------------------------------------------------------------------*/
1978 static struct _MatOps MatOps_Values = {
1979        MatSetValues_MPIBAIJ,
1980        MatGetRow_MPIBAIJ,
1981        MatRestoreRow_MPIBAIJ,
1982        MatMult_MPIBAIJ,
1983 /* 4*/ MatMultAdd_MPIBAIJ,
1984        MatMultTranspose_MPIBAIJ,
1985        MatMultTransposeAdd_MPIBAIJ,
1986        0,
1987        0,
1988        0,
1989 /*10*/ 0,
1990        0,
1991        0,
1992        0,
1993        MatTranspose_MPIBAIJ,
1994 /*15*/ MatGetInfo_MPIBAIJ,
1995        MatEqual_MPIBAIJ,
1996        MatGetDiagonal_MPIBAIJ,
1997        MatDiagonalScale_MPIBAIJ,
1998        MatNorm_MPIBAIJ,
1999 /*20*/ MatAssemblyBegin_MPIBAIJ,
2000        MatAssemblyEnd_MPIBAIJ,
2001        0,
2002        MatSetOption_MPIBAIJ,
2003        MatZeroEntries_MPIBAIJ,
2004 /*25*/ MatZeroRows_MPIBAIJ,
2005        0,
2006        0,
2007        0,
2008        0,
2009 /*30*/ MatSetUpPreallocation_MPIBAIJ,
2010        0,
2011        0,
2012        0,
2013        0,
2014 /*35*/ MatDuplicate_MPIBAIJ,
2015        0,
2016        0,
2017        0,
2018        0,
2019 /*40*/ MatAXPY_MPIBAIJ,
2020        MatGetSubMatrices_MPIBAIJ,
2021        MatIncreaseOverlap_MPIBAIJ,
2022        MatGetValues_MPIBAIJ,
2023        MatCopy_MPIBAIJ,
2024 /*45*/ MatPrintHelp_MPIBAIJ,
2025        MatScale_MPIBAIJ,
2026        0,
2027        0,
2028        0,
2029 /*50*/ 0,
2030        0,
2031        0,
2032        0,
2033        0,
2034 /*55*/ 0,
2035        0,
2036        MatSetUnfactored_MPIBAIJ,
2037        0,
2038        MatSetValuesBlocked_MPIBAIJ,
2039 /*60*/ 0,
2040        MatDestroy_MPIBAIJ,
2041        MatView_MPIBAIJ,
2042        0,
2043        0,
2044 /*65*/ 0,
2045        0,
2046        0,
2047        0,
2048        0,
2049 /*70*/ MatGetRowMax_MPIBAIJ,
2050        0,
2051        0,
2052        0,
2053        0,
2054 /*75*/ 0,
2055        0,
2056        0,
2057        0,
2058        0,
2059 /*80*/ 0,
2060        0,
2061        0,
2062        0,
2063        MatLoad_MPIBAIJ,
2064 /*85*/ 0,
2065        0,
2066        0,
2067        0,
2068        0,
2069 /*90*/ 0,
2070        0,
2071        0,
2072        0,
2073        0,
2074 /*95*/ 0,
2075        0,
2076        0,
2077        0,
2078        0,
2079 /*100*/0,
2080        0,
2081        0,
2082        0,
2083        0,
2084 /*105*/0,
2085        MatRealPart_MPIBAIJ,
2086        MatImaginaryPart_MPIBAIJ};
2087 
2088 
2089 EXTERN_C_BEGIN
2090 #undef __FUNCT__
2091 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2092 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2093 {
2094   PetscFunctionBegin;
2095   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2096   *iscopy = PETSC_FALSE;
2097   PetscFunctionReturn(0);
2098 }
2099 EXTERN_C_END
2100 
2101 EXTERN_C_BEGIN
2102 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2103 EXTERN_C_END
2104 
2105 #undef __FUNCT__
2106 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2107 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2108 {
2109   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)B->data;
2110   PetscInt       m = B->rmap.n/bs,cstart = baij->cstartbs, cend = baij->cendbs,j,nnz,i,d;
2111   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = baij->rstartbs,ii;
2112   const PetscInt *JJ;
2113   PetscScalar    *values;
2114   PetscErrorCode ierr;
2115 
2116   PetscFunctionBegin;
2117 #if defined(PETSC_OPT_g)
2118   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2119 #endif
2120   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2121   o_nnz = d_nnz + m;
2122 
2123   for (i=0; i<m; i++) {
2124     nnz     = I[i+1]- I[i];
2125     JJ      = J + I[i];
2126     nnz_max = PetscMax(nnz_max,nnz);
2127 #if defined(PETSC_OPT_g)
2128     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2129 #endif
2130     for (j=0; j<nnz; j++) {
2131       if (*JJ >= cstart) break;
2132       JJ++;
2133     }
2134     d = 0;
2135     for (; j<nnz; j++) {
2136       if (*JJ++ >= cend) break;
2137       d++;
2138     }
2139     d_nnz[i] = d;
2140     o_nnz[i] = nnz - d;
2141   }
2142   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2143   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2144 
2145   if (v) values = (PetscScalar*)v;
2146   else {
2147     ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2148     ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2149   }
2150 
2151   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2152   for (i=0; i<m; i++) {
2153     ii   = i + rstart;
2154     nnz  = I[i+1]- I[i];
2155     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2156   }
2157   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2158   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2159   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2160 
2161   if (!v) {
2162     ierr = PetscFree(values);CHKERRQ(ierr);
2163   }
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2169 /*@C
2170    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2171    (the default parallel PETSc format).
2172 
2173    Collective on MPI_Comm
2174 
2175    Input Parameters:
2176 +  A - the matrix
2177 .  i - the indices into j for the start of each local row (starts with zero)
2178 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2179 -  v - optional values in the matrix
2180 
2181    Level: developer
2182 
2183 .keywords: matrix, aij, compressed row, sparse, parallel
2184 
2185 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2186 @*/
2187 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2188 {
2189   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2190 
2191   PetscFunctionBegin;
2192   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2193   if (f) {
2194     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2195   }
2196   PetscFunctionReturn(0);
2197 }
2198 
2199 EXTERN_C_BEGIN
2200 #undef __FUNCT__
2201 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2202 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2203 {
2204   Mat_MPIBAIJ    *b;
2205   PetscErrorCode ierr;
2206   PetscInt       i;
2207 
2208   PetscFunctionBegin;
2209   B->preallocated = PETSC_TRUE;
2210   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2211 
2212   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2213   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2214   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2215   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2216   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2217 
2218   B->rmap.bs  = bs;
2219   B->cmap.bs  = bs;
2220   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2221   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2222 
2223   if (d_nnz) {
2224     for (i=0; i<B->rmap.n/bs; i++) {
2225       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2226     }
2227   }
2228   if (o_nnz) {
2229     for (i=0; i<B->rmap.n/bs; i++) {
2230       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2231     }
2232   }
2233 
2234   b = (Mat_MPIBAIJ*)B->data;
2235   b->bs2 = bs*bs;
2236   b->mbs = B->rmap.n/bs;
2237   b->nbs = B->cmap.n/bs;
2238   b->Mbs = B->rmap.N/bs;
2239   b->Nbs = B->cmap.N/bs;
2240 
2241   for (i=0; i<=b->size; i++) {
2242     b->rangebs[i] = B->rmap.range[i]/bs;
2243   }
2244   b->rstartbs = B->rmap.rstart/bs;
2245   b->rendbs   = B->rmap.rend/bs;
2246   b->cstartbs = B->cmap.rstart/bs;
2247   b->cendbs   = B->cmap.rend/bs;
2248 
2249   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2250   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2251   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2252   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2253   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2254   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2255   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2256   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2257   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2258   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2259 
2260   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2261 
2262   PetscFunctionReturn(0);
2263 }
2264 EXTERN_C_END
2265 
2266 EXTERN_C_BEGIN
2267 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2268 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2269 EXTERN_C_END
2270 
2271 /*MC
2272    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2273 
2274    Options Database Keys:
2275 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2276 
2277   Level: beginner
2278 
2279 .seealso: MatCreateMPIBAIJ
2280 M*/
2281 
2282 EXTERN_C_BEGIN
2283 #undef __FUNCT__
2284 #define __FUNCT__ "MatCreate_MPIBAIJ"
2285 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2286 {
2287   Mat_MPIBAIJ    *b;
2288   PetscErrorCode ierr;
2289   PetscTruth     flg;
2290 
2291   PetscFunctionBegin;
2292   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2293   B->data = (void*)b;
2294 
2295 
2296   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2297   B->mapping    = 0;
2298   B->factor     = 0;
2299   B->assembled  = PETSC_FALSE;
2300 
2301   B->insertmode = NOT_SET_VALUES;
2302   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2303   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2304 
2305   /* build local table of row and column ownerships */
2306   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
2307 
2308   /* build cache for off array entries formed */
2309   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2310   b->donotstash  = PETSC_FALSE;
2311   b->colmap      = PETSC_NULL;
2312   b->garray      = PETSC_NULL;
2313   b->roworiented = PETSC_TRUE;
2314 
2315 #if defined(PETSC_USE_MAT_SINGLE)
2316   /* stuff for MatSetValues_XXX in single precision */
2317   b->setvalueslen     = 0;
2318   b->setvaluescopy    = PETSC_NULL;
2319 #endif
2320 
2321   /* stuff used in block assembly */
2322   b->barray       = 0;
2323 
2324   /* stuff used for matrix vector multiply */
2325   b->lvec         = 0;
2326   b->Mvctx        = 0;
2327 
2328   /* stuff for MatGetRow() */
2329   b->rowindices   = 0;
2330   b->rowvalues    = 0;
2331   b->getrowactive = PETSC_FALSE;
2332 
2333   /* hash table stuff */
2334   b->ht           = 0;
2335   b->hd           = 0;
2336   b->ht_size      = 0;
2337   b->ht_flag      = PETSC_FALSE;
2338   b->ht_fact      = 0;
2339   b->ht_total_ct  = 0;
2340   b->ht_insert_ct = 0;
2341 
2342   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2343   if (flg) {
2344     PetscReal fact = 1.39;
2345     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2346     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2347     if (fact <= 1.0) fact = 1.39;
2348     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2349     ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2350   }
2351   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2352                                      "MatStoreValues_MPIBAIJ",
2353                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2354   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2355                                      "MatRetrieveValues_MPIBAIJ",
2356                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2357   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2358                                      "MatGetDiagonalBlock_MPIBAIJ",
2359                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2360   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2361                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2362                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2363   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2364 				     "MatMPIBAIJSetPreallocationCSR_MPIAIJ",
2365 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2366   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2367                                      "MatDiagonalScaleLocal_MPIBAIJ",
2368                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2369   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2370                                      "MatSetHashTableFactor_MPIBAIJ",
2371                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2372   PetscFunctionReturn(0);
2373 }
2374 EXTERN_C_END
2375 
2376 /*MC
2377    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2378 
2379    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2380    and MATMPIBAIJ otherwise.
2381 
2382    Options Database Keys:
2383 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2384 
2385   Level: beginner
2386 
2387 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2388 M*/
2389 
2390 EXTERN_C_BEGIN
2391 #undef __FUNCT__
2392 #define __FUNCT__ "MatCreate_BAIJ"
2393 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
2394 {
2395   PetscErrorCode ierr;
2396   PetscMPIInt    size;
2397 
2398   PetscFunctionBegin;
2399   ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr);
2400   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2401   if (size == 1) {
2402     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2403   } else {
2404     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2405   }
2406   PetscFunctionReturn(0);
2407 }
2408 EXTERN_C_END
2409 
2410 #undef __FUNCT__
2411 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2412 /*@C
2413    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2414    (block compressed row).  For good matrix assembly performance
2415    the user should preallocate the matrix storage by setting the parameters
2416    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2417    performance can be increased by more than a factor of 50.
2418 
2419    Collective on Mat
2420 
2421    Input Parameters:
2422 +  A - the matrix
2423 .  bs   - size of blockk
2424 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2425            submatrix  (same for all local rows)
2426 .  d_nnz - array containing the number of block nonzeros in the various block rows
2427            of the in diagonal portion of the local (possibly different for each block
2428            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2429 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2430            submatrix (same for all local rows).
2431 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2432            off-diagonal portion of the local submatrix (possibly different for
2433            each block row) or PETSC_NULL.
2434 
2435    If the *_nnz parameter is given then the *_nz parameter is ignored
2436 
2437    Options Database Keys:
2438 .   -mat_no_unroll - uses code that does not unroll the loops in the
2439                      block calculations (much slower)
2440 .   -mat_block_size - size of the blocks to use
2441 
2442    Notes:
2443    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2444    than it must be used on all processors that share the object for that argument.
2445 
2446    Storage Information:
2447    For a square global matrix we define each processor's diagonal portion
2448    to be its local rows and the corresponding columns (a square submatrix);
2449    each processor's off-diagonal portion encompasses the remainder of the
2450    local matrix (a rectangular submatrix).
2451 
2452    The user can specify preallocated storage for the diagonal part of
2453    the local submatrix with either d_nz or d_nnz (not both).  Set
2454    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2455    memory allocation.  Likewise, specify preallocated storage for the
2456    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2457 
2458    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2459    the figure below we depict these three local rows and all columns (0-11).
2460 
2461 .vb
2462            0 1 2 3 4 5 6 7 8 9 10 11
2463           -------------------
2464    row 3  |  o o o d d d o o o o o o
2465    row 4  |  o o o d d d o o o o o o
2466    row 5  |  o o o d d d o o o o o o
2467           -------------------
2468 .ve
2469 
2470    Thus, any entries in the d locations are stored in the d (diagonal)
2471    submatrix, and any entries in the o locations are stored in the
2472    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2473    stored simply in the MATSEQBAIJ format for compressed row storage.
2474 
2475    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2476    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2477    In general, for PDE problems in which most nonzeros are near the diagonal,
2478    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2479    or you will get TERRIBLE performance; see the users' manual chapter on
2480    matrices.
2481 
2482    Level: intermediate
2483 
2484 .keywords: matrix, block, aij, compressed row, sparse, parallel
2485 
2486 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2487 @*/
2488 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2489 {
2490   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2491 
2492   PetscFunctionBegin;
2493   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2494   if (f) {
2495     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2496   }
2497   PetscFunctionReturn(0);
2498 }
2499 
2500 #undef __FUNCT__
2501 #define __FUNCT__ "MatCreateMPIBAIJ"
2502 /*@C
2503    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2504    (block compressed row).  For good matrix assembly performance
2505    the user should preallocate the matrix storage by setting the parameters
2506    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2507    performance can be increased by more than a factor of 50.
2508 
2509    Collective on MPI_Comm
2510 
2511    Input Parameters:
2512 +  comm - MPI communicator
2513 .  bs   - size of blockk
2514 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2515            This value should be the same as the local size used in creating the
2516            y vector for the matrix-vector product y = Ax.
2517 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2518            This value should be the same as the local size used in creating the
2519            x vector for the matrix-vector product y = Ax.
2520 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2521 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2522 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2523            submatrix  (same for all local rows)
2524 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2525            of the in diagonal portion of the local (possibly different for each block
2526            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2527 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2528            submatrix (same for all local rows).
2529 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2530            off-diagonal portion of the local submatrix (possibly different for
2531            each block row) or PETSC_NULL.
2532 
2533    Output Parameter:
2534 .  A - the matrix
2535 
2536    Options Database Keys:
2537 .   -mat_no_unroll - uses code that does not unroll the loops in the
2538                      block calculations (much slower)
2539 .   -mat_block_size - size of the blocks to use
2540 
2541    Notes:
2542    If the *_nnz parameter is given then the *_nz parameter is ignored
2543 
2544    A nonzero block is any block that as 1 or more nonzeros in it
2545 
2546    The user MUST specify either the local or global matrix dimensions
2547    (possibly both).
2548 
2549    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2550    than it must be used on all processors that share the object for that argument.
2551 
2552    Storage Information:
2553    For a square global matrix we define each processor's diagonal portion
2554    to be its local rows and the corresponding columns (a square submatrix);
2555    each processor's off-diagonal portion encompasses the remainder of the
2556    local matrix (a rectangular submatrix).
2557 
2558    The user can specify preallocated storage for the diagonal part of
2559    the local submatrix with either d_nz or d_nnz (not both).  Set
2560    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2561    memory allocation.  Likewise, specify preallocated storage for the
2562    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2563 
2564    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2565    the figure below we depict these three local rows and all columns (0-11).
2566 
2567 .vb
2568            0 1 2 3 4 5 6 7 8 9 10 11
2569           -------------------
2570    row 3  |  o o o d d d o o o o o o
2571    row 4  |  o o o d d d o o o o o o
2572    row 5  |  o o o d d d o o o o o o
2573           -------------------
2574 .ve
2575 
2576    Thus, any entries in the d locations are stored in the d (diagonal)
2577    submatrix, and any entries in the o locations are stored in the
2578    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2579    stored simply in the MATSEQBAIJ format for compressed row storage.
2580 
2581    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2582    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2583    In general, for PDE problems in which most nonzeros are near the diagonal,
2584    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2585    or you will get TERRIBLE performance; see the users' manual chapter on
2586    matrices.
2587 
2588    Level: intermediate
2589 
2590 .keywords: matrix, block, aij, compressed row, sparse, parallel
2591 
2592 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2593 @*/
2594 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2595 {
2596   PetscErrorCode ierr;
2597   PetscMPIInt    size;
2598 
2599   PetscFunctionBegin;
2600   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2601   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2602   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2603   if (size > 1) {
2604     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2605     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2606   } else {
2607     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2608     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2609   }
2610   PetscFunctionReturn(0);
2611 }
2612 
2613 #undef __FUNCT__
2614 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2615 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2616 {
2617   Mat            mat;
2618   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2619   PetscErrorCode ierr;
2620   PetscInt       len=0;
2621 
2622   PetscFunctionBegin;
2623   *newmat       = 0;
2624   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2625   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2626   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2627   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2628 
2629   mat->factor       = matin->factor;
2630   mat->preallocated = PETSC_TRUE;
2631   mat->assembled    = PETSC_TRUE;
2632   mat->insertmode   = NOT_SET_VALUES;
2633 
2634   a      = (Mat_MPIBAIJ*)mat->data;
2635   mat->rmap.bs  = matin->rmap.bs;
2636   a->bs2   = oldmat->bs2;
2637   a->mbs   = oldmat->mbs;
2638   a->nbs   = oldmat->nbs;
2639   a->Mbs   = oldmat->Mbs;
2640   a->Nbs   = oldmat->Nbs;
2641 
2642   ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2643   ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2644 
2645   a->size         = oldmat->size;
2646   a->rank         = oldmat->rank;
2647   a->donotstash   = oldmat->donotstash;
2648   a->roworiented  = oldmat->roworiented;
2649   a->rowindices   = 0;
2650   a->rowvalues    = 0;
2651   a->getrowactive = PETSC_FALSE;
2652   a->barray       = 0;
2653   a->rstartbs     = oldmat->rstartbs;
2654   a->rendbs       = oldmat->rendbs;
2655   a->cstartbs     = oldmat->cstartbs;
2656   a->cendbs       = oldmat->cendbs;
2657 
2658   /* hash table stuff */
2659   a->ht           = 0;
2660   a->hd           = 0;
2661   a->ht_size      = 0;
2662   a->ht_flag      = oldmat->ht_flag;
2663   a->ht_fact      = oldmat->ht_fact;
2664   a->ht_total_ct  = 0;
2665   a->ht_insert_ct = 0;
2666 
2667   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
2668   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2669   ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr);
2670   if (oldmat->colmap) {
2671 #if defined (PETSC_USE_CTABLE)
2672   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2673 #else
2674   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2675   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2676   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2677 #endif
2678   } else a->colmap = 0;
2679 
2680   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2681     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2682     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2683     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2684   } else a->garray = 0;
2685 
2686   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2687   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2688   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2689   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2690 
2691   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2692   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2693   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2694   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2695   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2696   *newmat = mat;
2697 
2698   PetscFunctionReturn(0);
2699 }
2700 
2701 #include "petscsys.h"
2702 
2703 #undef __FUNCT__
2704 #define __FUNCT__ "MatLoad_MPIBAIJ"
2705 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2706 {
2707   Mat            A;
2708   PetscErrorCode ierr;
2709   int            fd;
2710   PetscInt       i,nz,j,rstart,rend;
2711   PetscScalar    *vals,*buf;
2712   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2713   MPI_Status     status;
2714   PetscMPIInt    rank,size,maxnz;
2715   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2716   PetscInt       *locrowlens,*procsnz = 0,*browners;
2717   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
2718   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
2719   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2720   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
2721 
2722   PetscFunctionBegin;
2723   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2724 
2725   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2726   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2727   if (!rank) {
2728     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2729     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2730     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2731   }
2732 
2733   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2734   M = header[1]; N = header[2];
2735 
2736   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2737 
2738   /*
2739      This code adds extra rows to make sure the number of rows is
2740      divisible by the blocksize
2741   */
2742   Mbs        = M/bs;
2743   extra_rows = bs - M + bs*Mbs;
2744   if (extra_rows == bs) extra_rows = 0;
2745   else                  Mbs++;
2746   if (extra_rows && !rank) {
2747     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2748   }
2749 
2750   /* determine ownership of all rows */
2751   mbs        = Mbs/size + ((Mbs % size) > rank);
2752   m          = mbs*bs;
2753   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
2754   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2755 
2756   /* process 0 needs enough room for process with most rows */
2757   if (!rank) {
2758     mmax = rowners[1];
2759     for (i=2; i<size; i++) {
2760       mmax = PetscMax(mmax,rowners[i]);
2761     }
2762     mmax*=bs;
2763   } else mmax = m;
2764 
2765   rowners[0] = 0;
2766   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
2767   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2768   rstart = rowners[rank];
2769   rend   = rowners[rank+1];
2770 
2771   /* distribute row lengths to all processors */
2772   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2773   if (!rank) {
2774     mend = m;
2775     if (size == 1) mend = mend - extra_rows;
2776     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
2777     for (j=mend; j<m; j++) locrowlens[j] = 1;
2778     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2779     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2780     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2781     for (j=0; j<m; j++) {
2782       procsnz[0] += locrowlens[j];
2783     }
2784     for (i=1; i<size; i++) {
2785       mend = browners[i+1] - browners[i];
2786       if (i == size-1) mend = mend - extra_rows;
2787       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
2788       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2789       /* calculate the number of nonzeros on each processor */
2790       for (j=0; j<browners[i+1]-browners[i]; j++) {
2791         procsnz[i] += rowlengths[j];
2792       }
2793       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2794     }
2795     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2796   } else {
2797     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2798   }
2799 
2800   if (!rank) {
2801     /* determine max buffer needed and allocate it */
2802     maxnz = procsnz[0];
2803     for (i=1; i<size; i++) {
2804       maxnz = PetscMax(maxnz,procsnz[i]);
2805     }
2806     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2807 
2808     /* read in my part of the matrix column indices  */
2809     nz     = procsnz[0];
2810     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2811     mycols = ibuf;
2812     if (size == 1)  nz -= extra_rows;
2813     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2814     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2815 
2816     /* read in every ones (except the last) and ship off */
2817     for (i=1; i<size-1; i++) {
2818       nz   = procsnz[i];
2819       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2820       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2821     }
2822     /* read in the stuff for the last proc */
2823     if (size != 1) {
2824       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2825       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2826       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2827       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2828     }
2829     ierr = PetscFree(cols);CHKERRQ(ierr);
2830   } else {
2831     /* determine buffer space needed for message */
2832     nz = 0;
2833     for (i=0; i<m; i++) {
2834       nz += locrowlens[i];
2835     }
2836     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2837     mycols = ibuf;
2838     /* receive message of column indices*/
2839     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2840     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2841     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2842   }
2843 
2844   /* loop over local rows, determining number of off diagonal entries */
2845   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2846   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2847   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2848   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2849   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2850   rowcount = 0; nzcount = 0;
2851   for (i=0; i<mbs; i++) {
2852     dcount  = 0;
2853     odcount = 0;
2854     for (j=0; j<bs; j++) {
2855       kmax = locrowlens[rowcount];
2856       for (k=0; k<kmax; k++) {
2857         tmp = mycols[nzcount++]/bs;
2858         if (!mask[tmp]) {
2859           mask[tmp] = 1;
2860           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2861           else masked1[dcount++] = tmp;
2862         }
2863       }
2864       rowcount++;
2865     }
2866 
2867     dlens[i]  = dcount;
2868     odlens[i] = odcount;
2869 
2870     /* zero out the mask elements we set */
2871     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2872     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2873   }
2874 
2875   /* create our matrix */
2876   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2877   ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2878   ierr = MatSetType(A,type);CHKERRQ(ierr)
2879   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2880 
2881   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
2882   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2883 
2884   if (!rank) {
2885     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2886     /* read in my part of the matrix numerical values  */
2887     nz = procsnz[0];
2888     vals = buf;
2889     mycols = ibuf;
2890     if (size == 1)  nz -= extra_rows;
2891     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2892     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2893 
2894     /* insert into matrix */
2895     jj      = rstart*bs;
2896     for (i=0; i<m; i++) {
2897       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2898       mycols += locrowlens[i];
2899       vals   += locrowlens[i];
2900       jj++;
2901     }
2902     /* read in other processors (except the last one) and ship out */
2903     for (i=1; i<size-1; i++) {
2904       nz   = procsnz[i];
2905       vals = buf;
2906       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2907       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2908     }
2909     /* the last proc */
2910     if (size != 1){
2911       nz   = procsnz[i] - extra_rows;
2912       vals = buf;
2913       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2914       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2915       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2916     }
2917     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2918   } else {
2919     /* receive numeric values */
2920     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2921 
2922     /* receive message of values*/
2923     vals   = buf;
2924     mycols = ibuf;
2925     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2926     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2927     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2928 
2929     /* insert into matrix */
2930     jj      = rstart*bs;
2931     for (i=0; i<m; i++) {
2932       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2933       mycols += locrowlens[i];
2934       vals   += locrowlens[i];
2935       jj++;
2936     }
2937   }
2938   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2939   ierr = PetscFree(buf);CHKERRQ(ierr);
2940   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2941   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2942   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2943   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2944   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2945   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2946 
2947   *newmat = A;
2948   PetscFunctionReturn(0);
2949 }
2950 
2951 #undef __FUNCT__
2952 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2953 /*@
2954    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2955 
2956    Input Parameters:
2957 .  mat  - the matrix
2958 .  fact - factor
2959 
2960    Collective on Mat
2961 
2962    Level: advanced
2963 
2964   Notes:
2965    This can also be set by the command line option: -mat_use_hash_table fact
2966 
2967 .keywords: matrix, hashtable, factor, HT
2968 
2969 .seealso: MatSetOption()
2970 @*/
2971 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2972 {
2973   PetscErrorCode ierr,(*f)(Mat,PetscReal);
2974 
2975   PetscFunctionBegin;
2976   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2977   if (f) {
2978     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2979   }
2980   PetscFunctionReturn(0);
2981 }
2982 
2983 EXTERN_C_BEGIN
2984 #undef __FUNCT__
2985 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2986 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2987 {
2988   Mat_MPIBAIJ *baij;
2989 
2990   PetscFunctionBegin;
2991   baij = (Mat_MPIBAIJ*)mat->data;
2992   baij->ht_fact = fact;
2993   PetscFunctionReturn(0);
2994 }
2995 EXTERN_C_END
2996 
2997 #undef __FUNCT__
2998 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2999 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3000 {
3001   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3002   PetscFunctionBegin;
3003   *Ad     = a->A;
3004   *Ao     = a->B;
3005   *colmap = a->garray;
3006   PetscFunctionReturn(0);
3007 }
3008