xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision e2ee2a47cae8248a2ea96b9d6f35ff3f37ecfc00)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 
5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat);
15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
16 
17 /*  UGLY, ugly, ugly
18    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
19    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
20    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
21    converts the entries into single precision and then calls ..._MatScalar() to put them
22    into the single precision data structures.
23 */
24 #if defined(PETSC_USE_MAT_SINGLE)
25 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
26 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
27 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
28 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
29 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
30 #else
31 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
32 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
33 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
34 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
35 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
36 #endif
37 
38 #undef __FUNCT__
39 #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
40 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v)
41 {
42   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
43   PetscErrorCode ierr;
44   PetscInt       i;
45   PetscScalar    *va,*vb;
46   Vec            vtmp;
47 
48   PetscFunctionBegin;
49 
50   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
51   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
52 
53   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr);
54   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
55   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
56 
57   for (i=0; i<A->rmap.n; i++){
58     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
59   }
60 
61   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
62   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
63   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
64 
65   PetscFunctionReturn(0);
66 }
67 
68 EXTERN_C_BEGIN
69 #undef __FUNCT__
70 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
71 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
72 {
73   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
74   PetscErrorCode ierr;
75 
76   PetscFunctionBegin;
77   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
78   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
79   PetscFunctionReturn(0);
80 }
81 EXTERN_C_END
82 
83 EXTERN_C_BEGIN
84 #undef __FUNCT__
85 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
86 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
87 {
88   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
89   PetscErrorCode ierr;
90 
91   PetscFunctionBegin;
92   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
93   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
94   PetscFunctionReturn(0);
95 }
96 EXTERN_C_END
97 
98 /*
99      Local utility routine that creates a mapping from the global column
100    number to the local number in the off-diagonal part of the local
101    storage of the matrix.  This is done in a non scable way since the
102    length of colmap equals the global matrix length.
103 */
104 #undef __FUNCT__
105 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
106 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
107 {
108   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
109   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
110   PetscErrorCode ierr;
111   PetscInt       nbs = B->nbs,i,bs=mat->rmap.bs;
112 
113   PetscFunctionBegin;
114 #if defined (PETSC_USE_CTABLE)
115   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
116   for (i=0; i<nbs; i++){
117     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
118   }
119 #else
120   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
121   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
122   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
123   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
124 #endif
125   PetscFunctionReturn(0);
126 }
127 
128 #define CHUNKSIZE  10
129 
130 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
131 { \
132  \
133     brow = row/bs;  \
134     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135     rmax = aimax[brow]; nrow = ailen[brow]; \
136       bcol = col/bs; \
137       ridx = row % bs; cidx = col % bs; \
138       low = 0; high = nrow; \
139       while (high-low > 3) { \
140         t = (low+high)/2; \
141         if (rp[t] > bcol) high = t; \
142         else              low  = t; \
143       } \
144       for (_i=low; _i<high; _i++) { \
145         if (rp[_i] > bcol) break; \
146         if (rp[_i] == bcol) { \
147           bap  = ap +  bs2*_i + bs*cidx + ridx; \
148           if (addv == ADD_VALUES) *bap += value;  \
149           else                    *bap  = value;  \
150           goto a_noinsert; \
151         } \
152       } \
153       if (a->nonew == 1) goto a_noinsert; \
154       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
155       MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \
156       N = nrow++ - 1;  \
157       /* shift up all the later entries in this row */ \
158       for (ii=N; ii>=_i; ii--) { \
159         rp[ii+1] = rp[ii]; \
160         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
161       } \
162       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
163       rp[_i]                      = bcol;  \
164       ap[bs2*_i + bs*cidx + ridx] = value;  \
165       a_noinsert:; \
166     ailen[brow] = nrow; \
167 }
168 
169 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
170 { \
171     brow = row/bs;  \
172     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
173     rmax = bimax[brow]; nrow = bilen[brow]; \
174       bcol = col/bs; \
175       ridx = row % bs; cidx = col % bs; \
176       low = 0; high = nrow; \
177       while (high-low > 3) { \
178         t = (low+high)/2; \
179         if (rp[t] > bcol) high = t; \
180         else              low  = t; \
181       } \
182       for (_i=low; _i<high; _i++) { \
183         if (rp[_i] > bcol) break; \
184         if (rp[_i] == bcol) { \
185           bap  = ap +  bs2*_i + bs*cidx + ridx; \
186           if (addv == ADD_VALUES) *bap += value;  \
187           else                    *bap  = value;  \
188           goto b_noinsert; \
189         } \
190       } \
191       if (b->nonew == 1) goto b_noinsert; \
192       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
193       MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \
194       CHKMEMQ;\
195       N = nrow++ - 1;  \
196       /* shift up all the later entries in this row */ \
197       for (ii=N; ii>=_i; ii--) { \
198         rp[ii+1] = rp[ii]; \
199         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
200       } \
201       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
202       rp[_i]                      = bcol;  \
203       ap[bs2*_i + bs*cidx + ridx] = value;  \
204       b_noinsert:; \
205     bilen[brow] = nrow; \
206 }
207 
208 #if defined(PETSC_USE_MAT_SINGLE)
209 #undef __FUNCT__
210 #define __FUNCT__ "MatSetValues_MPIBAIJ"
211 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
212 {
213   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
214   PetscErrorCode ierr;
215   PetscInt       i,N = m*n;
216   MatScalar      *vsingle;
217 
218   PetscFunctionBegin;
219   if (N > b->setvalueslen) {
220     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
221     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
222     b->setvalueslen  = N;
223   }
224   vsingle = b->setvaluescopy;
225 
226   for (i=0; i<N; i++) {
227     vsingle[i] = v[i];
228   }
229   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
230   PetscFunctionReturn(0);
231 }
232 
233 #undef __FUNCT__
234 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
235 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
236 {
237   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
238   PetscErrorCode ierr;
239   PetscInt       i,N = m*n*b->bs2;
240   MatScalar      *vsingle;
241 
242   PetscFunctionBegin;
243   if (N > b->setvalueslen) {
244     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
245     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
246     b->setvalueslen  = N;
247   }
248   vsingle = b->setvaluescopy;
249   for (i=0; i<N; i++) {
250     vsingle[i] = v[i];
251   }
252   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
258 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
259 {
260   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
261   PetscErrorCode ierr;
262   PetscInt       i,N = m*n;
263   MatScalar      *vsingle;
264 
265   PetscFunctionBegin;
266   if (N > b->setvalueslen) {
267     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
268     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
269     b->setvalueslen  = N;
270   }
271   vsingle = b->setvaluescopy;
272   for (i=0; i<N; i++) {
273     vsingle[i] = v[i];
274   }
275   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
281 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
282 {
283   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
284   PetscErrorCode ierr;
285   PetscInt       i,N = m*n*b->bs2;
286   MatScalar      *vsingle;
287 
288   PetscFunctionBegin;
289   if (N > b->setvalueslen) {
290     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
291     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
292     b->setvalueslen  = N;
293   }
294   vsingle = b->setvaluescopy;
295   for (i=0; i<N; i++) {
296     vsingle[i] = v[i];
297   }
298   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
299   PetscFunctionReturn(0);
300 }
301 #endif
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
305 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
306 {
307   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
308   MatScalar      value;
309   PetscTruth     roworiented = baij->roworiented;
310   PetscErrorCode ierr;
311   PetscInt       i,j,row,col;
312   PetscInt       rstart_orig=mat->rmap.rstart;
313   PetscInt       rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart;
314   PetscInt       cend_orig=mat->cmap.rend,bs=mat->rmap.bs;
315 
316   /* Some Variables required in the macro */
317   Mat            A = baij->A;
318   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
319   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
320   MatScalar      *aa=a->a;
321 
322   Mat            B = baij->B;
323   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
324   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
325   MatScalar      *ba=b->a;
326 
327   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
328   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
329   MatScalar      *ap,*bap;
330 
331   PetscFunctionBegin;
332   for (i=0; i<m; i++) {
333     if (im[i] < 0) continue;
334 #if defined(PETSC_USE_DEBUG)
335     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
336 #endif
337     if (im[i] >= rstart_orig && im[i] < rend_orig) {
338       row = im[i] - rstart_orig;
339       for (j=0; j<n; j++) {
340         if (in[j] >= cstart_orig && in[j] < cend_orig){
341           col = in[j] - cstart_orig;
342           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
343           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
344           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
345         } else if (in[j] < 0) continue;
346 #if defined(PETSC_USE_DEBUG)
347         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap.N-1);}
348 #endif
349         else {
350           if (mat->was_assembled) {
351             if (!baij->colmap) {
352               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
353             }
354 #if defined (PETSC_USE_CTABLE)
355             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
356             col  = col - 1;
357 #else
358             col = baij->colmap[in[j]/bs] - 1;
359 #endif
360             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
361               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
362               col =  in[j];
363               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
364               B = baij->B;
365               b = (Mat_SeqBAIJ*)(B)->data;
366               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
367               ba=b->a;
368             } else col += in[j]%bs;
369           } else col = in[j];
370           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
371           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
372           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
373         }
374       }
375     } else {
376       if (!baij->donotstash) {
377         if (roworiented) {
378           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
379         } else {
380           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
381         }
382       }
383     }
384   }
385   PetscFunctionReturn(0);
386 }
387 
388 #undef __FUNCT__
389 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
390 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
391 {
392   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
393   const MatScalar *value;
394   MatScalar       *barray=baij->barray;
395   PetscTruth      roworiented = baij->roworiented;
396   PetscErrorCode  ierr;
397   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
398   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
399   PetscInt        cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2;
400 
401   PetscFunctionBegin;
402   if(!barray) {
403     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
404     baij->barray = barray;
405   }
406 
407   if (roworiented) {
408     stepval = (n-1)*bs;
409   } else {
410     stepval = (m-1)*bs;
411   }
412   for (i=0; i<m; i++) {
413     if (im[i] < 0) continue;
414 #if defined(PETSC_USE_DEBUG)
415     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
416 #endif
417     if (im[i] >= rstart && im[i] < rend) {
418       row = im[i] - rstart;
419       for (j=0; j<n; j++) {
420         /* If NumCol = 1 then a copy is not required */
421         if ((roworiented) && (n == 1)) {
422           barray = (MatScalar*)v + i*bs2;
423         } else if((!roworiented) && (m == 1)) {
424           barray = (MatScalar*)v + j*bs2;
425         } else { /* Here a copy is required */
426           if (roworiented) {
427             value = v + i*(stepval+bs)*bs + j*bs;
428           } else {
429             value = v + j*(stepval+bs)*bs + i*bs;
430           }
431           for (ii=0; ii<bs; ii++,value+=stepval) {
432             for (jj=0; jj<bs; jj++) {
433               *barray++  = *value++;
434             }
435           }
436           barray -=bs2;
437         }
438 
439         if (in[j] >= cstart && in[j] < cend){
440           col  = in[j] - cstart;
441           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
442         }
443         else if (in[j] < 0) continue;
444 #if defined(PETSC_USE_DEBUG)
445         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
446 #endif
447         else {
448           if (mat->was_assembled) {
449             if (!baij->colmap) {
450               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
451             }
452 
453 #if defined(PETSC_USE_DEBUG)
454 #if defined (PETSC_USE_CTABLE)
455             { PetscInt data;
456               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
457               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
458             }
459 #else
460             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
461 #endif
462 #endif
463 #if defined (PETSC_USE_CTABLE)
464 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
465             col  = (col - 1)/bs;
466 #else
467             col = (baij->colmap[in[j]] - 1)/bs;
468 #endif
469             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
470               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
471               col =  in[j];
472             }
473           }
474           else col = in[j];
475           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
476         }
477       }
478     } else {
479       if (!baij->donotstash) {
480         if (roworiented) {
481           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
482         } else {
483           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
484         }
485       }
486     }
487   }
488   PetscFunctionReturn(0);
489 }
490 
491 #define HASH_KEY 0.6180339887
492 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
493 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
494 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
495 #undef __FUNCT__
496 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
497 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
498 {
499   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
500   PetscTruth     roworiented = baij->roworiented;
501   PetscErrorCode ierr;
502   PetscInt       i,j,row,col;
503   PetscInt       rstart_orig=mat->rmap.rstart;
504   PetscInt       rend_orig=mat->rmap.rend,Nbs=baij->Nbs;
505   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap.bs,*HT=baij->ht,idx;
506   PetscReal      tmp;
507   MatScalar      **HD = baij->hd,value;
508 #if defined(PETSC_USE_DEBUG)
509   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
510 #endif
511 
512   PetscFunctionBegin;
513 
514   for (i=0; i<m; i++) {
515 #if defined(PETSC_USE_DEBUG)
516     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
517     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
518 #endif
519       row = im[i];
520     if (row >= rstart_orig && row < rend_orig) {
521       for (j=0; j<n; j++) {
522         col = in[j];
523         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
524         /* Look up PetscInto the Hash Table */
525         key = (row/bs)*Nbs+(col/bs)+1;
526         h1  = HASH(size,key,tmp);
527 
528 
529         idx = h1;
530 #if defined(PETSC_USE_DEBUG)
531         insert_ct++;
532         total_ct++;
533         if (HT[idx] != key) {
534           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
535           if (idx == size) {
536             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
537             if (idx == h1) {
538               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
539             }
540           }
541         }
542 #else
543         if (HT[idx] != key) {
544           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
545           if (idx == size) {
546             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
547             if (idx == h1) {
548               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
549             }
550           }
551         }
552 #endif
553         /* A HASH table entry is found, so insert the values at the correct address */
554         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
555         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
556       }
557     } else {
558       if (!baij->donotstash) {
559         if (roworiented) {
560           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
561         } else {
562           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
563         }
564       }
565     }
566   }
567 #if defined(PETSC_USE_DEBUG)
568   baij->ht_total_ct = total_ct;
569   baij->ht_insert_ct = insert_ct;
570 #endif
571   PetscFunctionReturn(0);
572 }
573 
574 #undef __FUNCT__
575 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
576 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
577 {
578   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
579   PetscTruth      roworiented = baij->roworiented;
580   PetscErrorCode  ierr;
581   PetscInt        i,j,ii,jj,row,col;
582   PetscInt        rstart=baij->rstartbs;
583   PetscInt        rend=mat->rmap.rend,stepval,bs=mat->rmap.bs,bs2=baij->bs2;
584   PetscInt        h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
585   PetscReal       tmp;
586   MatScalar       **HD = baij->hd,*baij_a;
587   const MatScalar *v_t,*value;
588 #if defined(PETSC_USE_DEBUG)
589   PetscInt        total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
590 #endif
591 
592   PetscFunctionBegin;
593 
594   if (roworiented) {
595     stepval = (n-1)*bs;
596   } else {
597     stepval = (m-1)*bs;
598   }
599   for (i=0; i<m; i++) {
600 #if defined(PETSC_USE_DEBUG)
601     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
602     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
603 #endif
604     row   = im[i];
605     v_t   = v + i*bs2;
606     if (row >= rstart && row < rend) {
607       for (j=0; j<n; j++) {
608         col = in[j];
609 
610         /* Look up into the Hash Table */
611         key = row*Nbs+col+1;
612         h1  = HASH(size,key,tmp);
613 
614         idx = h1;
615 #if defined(PETSC_USE_DEBUG)
616         total_ct++;
617         insert_ct++;
618        if (HT[idx] != key) {
619           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
620           if (idx == size) {
621             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
622             if (idx == h1) {
623               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
624             }
625           }
626         }
627 #else
628         if (HT[idx] != key) {
629           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
630           if (idx == size) {
631             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
632             if (idx == h1) {
633               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
634             }
635           }
636         }
637 #endif
638         baij_a = HD[idx];
639         if (roworiented) {
640           /*value = v + i*(stepval+bs)*bs + j*bs;*/
641           /* value = v + (i*(stepval+bs)+j)*bs; */
642           value = v_t;
643           v_t  += bs;
644           if (addv == ADD_VALUES) {
645             for (ii=0; ii<bs; ii++,value+=stepval) {
646               for (jj=ii; jj<bs2; jj+=bs) {
647                 baij_a[jj]  += *value++;
648               }
649             }
650           } else {
651             for (ii=0; ii<bs; ii++,value+=stepval) {
652               for (jj=ii; jj<bs2; jj+=bs) {
653                 baij_a[jj]  = *value++;
654               }
655             }
656           }
657         } else {
658           value = v + j*(stepval+bs)*bs + i*bs;
659           if (addv == ADD_VALUES) {
660             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
661               for (jj=0; jj<bs; jj++) {
662                 baij_a[jj]  += *value++;
663               }
664             }
665           } else {
666             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
667               for (jj=0; jj<bs; jj++) {
668                 baij_a[jj]  = *value++;
669               }
670             }
671           }
672         }
673       }
674     } else {
675       if (!baij->donotstash) {
676         if (roworiented) {
677           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
678         } else {
679           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
680         }
681       }
682     }
683   }
684 #if defined(PETSC_USE_DEBUG)
685   baij->ht_total_ct = total_ct;
686   baij->ht_insert_ct = insert_ct;
687 #endif
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatGetValues_MPIBAIJ"
693 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
694 {
695   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
696   PetscErrorCode ierr;
697   PetscInt       bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend;
698   PetscInt       bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data;
699 
700   PetscFunctionBegin;
701   for (i=0; i<m; i++) {
702     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
703     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
704     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
705       row = idxm[i] - bsrstart;
706       for (j=0; j<n; j++) {
707         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
708         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
709         if (idxn[j] >= bscstart && idxn[j] < bscend){
710           col = idxn[j] - bscstart;
711           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
712         } else {
713           if (!baij->colmap) {
714             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
715           }
716 #if defined (PETSC_USE_CTABLE)
717           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
718           data --;
719 #else
720           data = baij->colmap[idxn[j]/bs]-1;
721 #endif
722           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
723           else {
724             col  = data + idxn[j]%bs;
725             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
726           }
727         }
728       }
729     } else {
730       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
731     }
732   }
733  PetscFunctionReturn(0);
734 }
735 
736 #undef __FUNCT__
737 #define __FUNCT__ "MatNorm_MPIBAIJ"
738 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
739 {
740   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
741   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
742   PetscErrorCode ierr;
743   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap.bs,nz,row,col;
744   PetscReal      sum = 0.0;
745   MatScalar      *v;
746 
747   PetscFunctionBegin;
748   if (baij->size == 1) {
749     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
750   } else {
751     if (type == NORM_FROBENIUS) {
752       v = amat->a;
753       nz = amat->nz*bs2;
754       for (i=0; i<nz; i++) {
755 #if defined(PETSC_USE_COMPLEX)
756         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
757 #else
758         sum += (*v)*(*v); v++;
759 #endif
760       }
761       v = bmat->a;
762       nz = bmat->nz*bs2;
763       for (i=0; i<nz; i++) {
764 #if defined(PETSC_USE_COMPLEX)
765         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
766 #else
767         sum += (*v)*(*v); v++;
768 #endif
769       }
770       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
771       *nrm = sqrt(*nrm);
772     } else if (type == NORM_1) { /* max column sum */
773       PetscReal *tmp,*tmp2;
774       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
775       ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
776       tmp2 = tmp + mat->cmap.N;
777       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
778       v = amat->a; jj = amat->j;
779       for (i=0; i<amat->nz; i++) {
780         for (j=0; j<bs; j++){
781           col = bs*(cstart + *jj) + j; /* column index */
782           for (row=0; row<bs; row++){
783             tmp[col] += PetscAbsScalar(*v);  v++;
784           }
785         }
786         jj++;
787       }
788       v = bmat->a; jj = bmat->j;
789       for (i=0; i<bmat->nz; i++) {
790         for (j=0; j<bs; j++){
791           col = bs*garray[*jj] + j;
792           for (row=0; row<bs; row++){
793             tmp[col] += PetscAbsScalar(*v); v++;
794           }
795         }
796         jj++;
797       }
798       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
799       *nrm = 0.0;
800       for (j=0; j<mat->cmap.N; j++) {
801         if (tmp2[j] > *nrm) *nrm = tmp2[j];
802       }
803       ierr = PetscFree(tmp);CHKERRQ(ierr);
804     } else if (type == NORM_INFINITY) { /* max row sum */
805       PetscReal *sums;
806       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
807       sum = 0.0;
808       for (j=0; j<amat->mbs; j++) {
809         for (row=0; row<bs; row++) sums[row] = 0.0;
810         v = amat->a + bs2*amat->i[j];
811         nz = amat->i[j+1]-amat->i[j];
812         for (i=0; i<nz; i++) {
813           for (col=0; col<bs; col++){
814             for (row=0; row<bs; row++){
815               sums[row] += PetscAbsScalar(*v); v++;
816             }
817           }
818         }
819         v = bmat->a + bs2*bmat->i[j];
820         nz = bmat->i[j+1]-bmat->i[j];
821         for (i=0; i<nz; i++) {
822           for (col=0; col<bs; col++){
823             for (row=0; row<bs; row++){
824               sums[row] += PetscAbsScalar(*v); v++;
825             }
826           }
827         }
828         for (row=0; row<bs; row++){
829           if (sums[row] > sum) sum = sums[row];
830         }
831       }
832       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
833       ierr = PetscFree(sums);CHKERRQ(ierr);
834     } else {
835       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
836     }
837   }
838   PetscFunctionReturn(0);
839 }
840 
841 /*
842   Creates the hash table, and sets the table
843   This table is created only once.
844   If new entried need to be added to the matrix
845   then the hash table has to be destroyed and
846   recreated.
847 */
848 #undef __FUNCT__
849 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
850 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
851 {
852   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
853   Mat            A = baij->A,B=baij->B;
854   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
855   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
856   PetscErrorCode ierr;
857   PetscInt       size,bs2=baij->bs2,rstart=baij->rstartbs;
858   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
859   PetscInt       *HT,key;
860   MatScalar      **HD;
861   PetscReal      tmp;
862 #if defined(PETSC_USE_INFO)
863   PetscInt       ct=0,max=0;
864 #endif
865 
866   PetscFunctionBegin;
867   baij->ht_size=(PetscInt)(factor*nz);
868   size = baij->ht_size;
869 
870   if (baij->ht) {
871     PetscFunctionReturn(0);
872   }
873 
874   /* Allocate Memory for Hash Table */
875   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
876   baij->ht = (PetscInt*)(baij->hd + size);
877   HD       = baij->hd;
878   HT       = baij->ht;
879 
880 
881   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
882 
883 
884   /* Loop Over A */
885   for (i=0; i<a->mbs; i++) {
886     for (j=ai[i]; j<ai[i+1]; j++) {
887       row = i+rstart;
888       col = aj[j]+cstart;
889 
890       key = row*Nbs + col + 1;
891       h1  = HASH(size,key,tmp);
892       for (k=0; k<size; k++){
893         if (!HT[(h1+k)%size]) {
894           HT[(h1+k)%size] = key;
895           HD[(h1+k)%size] = a->a + j*bs2;
896           break;
897 #if defined(PETSC_USE_INFO)
898         } else {
899           ct++;
900 #endif
901         }
902       }
903 #if defined(PETSC_USE_INFO)
904       if (k> max) max = k;
905 #endif
906     }
907   }
908   /* Loop Over B */
909   for (i=0; i<b->mbs; i++) {
910     for (j=bi[i]; j<bi[i+1]; j++) {
911       row = i+rstart;
912       col = garray[bj[j]];
913       key = row*Nbs + col + 1;
914       h1  = HASH(size,key,tmp);
915       for (k=0; k<size; k++){
916         if (!HT[(h1+k)%size]) {
917           HT[(h1+k)%size] = key;
918           HD[(h1+k)%size] = b->a + j*bs2;
919           break;
920 #if defined(PETSC_USE_INFO)
921         } else {
922           ct++;
923 #endif
924         }
925       }
926 #if defined(PETSC_USE_INFO)
927       if (k> max) max = k;
928 #endif
929     }
930   }
931 
932   /* Print Summary */
933 #if defined(PETSC_USE_INFO)
934   for (i=0,j=0; i<size; i++) {
935     if (HT[i]) {j++;}
936   }
937   ierr = PetscInfo2(0,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
938 #endif
939   PetscFunctionReturn(0);
940 }
941 
942 #undef __FUNCT__
943 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
944 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
945 {
946   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
947   PetscErrorCode ierr;
948   PetscInt       nstash,reallocs;
949   InsertMode     addv;
950 
951   PetscFunctionBegin;
952   if (baij->donotstash) {
953     PetscFunctionReturn(0);
954   }
955 
956   /* make sure all processors are either in INSERTMODE or ADDMODE */
957   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
958   if (addv == (ADD_VALUES|INSERT_VALUES)) {
959     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
960   }
961   mat->insertmode = addv; /* in case this processor had no cache */
962 
963   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
964   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr);
965   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
966   ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
967   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
968   ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 #undef __FUNCT__
973 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
974 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
975 {
976   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
977   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
978   PetscErrorCode ierr;
979   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
980   PetscInt       *row,*col,other_disassembled;
981   PetscTruth     r1,r2,r3;
982   MatScalar      *val;
983   InsertMode     addv = mat->insertmode;
984   PetscMPIInt    n;
985 
986   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
987   PetscFunctionBegin;
988   if (!baij->donotstash) {
989     while (1) {
990       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
991       if (!flg) break;
992 
993       for (i=0; i<n;) {
994         /* Now identify the consecutive vals belonging to the same row */
995         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
996         if (j < n) ncols = j-i;
997         else       ncols = n-i;
998         /* Now assemble all these values with a single function call */
999         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
1000         i = j;
1001       }
1002     }
1003     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1004     /* Now process the block-stash. Since the values are stashed column-oriented,
1005        set the roworiented flag to column oriented, and after MatSetValues()
1006        restore the original flags */
1007     r1 = baij->roworiented;
1008     r2 = a->roworiented;
1009     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
1010     baij->roworiented = PETSC_FALSE;
1011     a->roworiented    = PETSC_FALSE;
1012     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
1013     while (1) {
1014       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1015       if (!flg) break;
1016 
1017       for (i=0; i<n;) {
1018         /* Now identify the consecutive vals belonging to the same row */
1019         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1020         if (j < n) ncols = j-i;
1021         else       ncols = n-i;
1022         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1023         i = j;
1024       }
1025     }
1026     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1027     baij->roworiented = r1;
1028     a->roworiented    = r2;
1029     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
1030   }
1031 
1032   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1033   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1034 
1035   /* determine if any processor has disassembled, if so we must
1036      also disassemble ourselfs, in order that we may reassemble. */
1037   /*
1038      if nonzero structure of submatrix B cannot change then we know that
1039      no processor disassembled thus we can skip this stuff
1040   */
1041   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1042     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1043     if (mat->was_assembled && !other_disassembled) {
1044       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1045     }
1046   }
1047 
1048   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1049     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1050   }
1051   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
1052   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1053   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1054 
1055 #if defined(PETSC_USE_INFO)
1056   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1057     ierr = PetscInfo1(0,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
1058     baij->ht_total_ct  = 0;
1059     baij->ht_insert_ct = 0;
1060   }
1061 #endif
1062   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1063     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1064     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1065     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1066   }
1067 
1068   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1069   baij->rowvalues = 0;
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 #undef __FUNCT__
1074 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1075 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1076 {
1077   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1078   PetscErrorCode    ierr;
1079   PetscMPIInt       size = baij->size,rank = baij->rank;
1080   PetscInt          bs = mat->rmap.bs;
1081   PetscTruth        iascii,isdraw;
1082   PetscViewer       sviewer;
1083   PetscViewerFormat format;
1084 
1085   PetscFunctionBegin;
1086   /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */
1087   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1088   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1089   if (iascii) {
1090     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1091     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1092       MatInfo info;
1093       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1094       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1095       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
1096               rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
1097               mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr);
1098       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1099       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1100       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1101       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1102       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1103       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1104       PetscFunctionReturn(0);
1105     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1106       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1107       PetscFunctionReturn(0);
1108     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1109       PetscFunctionReturn(0);
1110     }
1111   }
1112 
1113   if (isdraw) {
1114     PetscDraw       draw;
1115     PetscTruth isnull;
1116     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1117     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1118   }
1119 
1120   if (size == 1) {
1121     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1122     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1123   } else {
1124     /* assemble the entire matrix onto first processor. */
1125     Mat         A;
1126     Mat_SeqBAIJ *Aloc;
1127     PetscInt    M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1128     MatScalar   *a;
1129 
1130     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1131     /* Perhaps this should be the type of mat? */
1132     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
1133     if (!rank) {
1134       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1135     } else {
1136       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1137     }
1138     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1139     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1140     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1141 
1142     /* copy over the A part */
1143     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1144     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1145     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1146 
1147     for (i=0; i<mbs; i++) {
1148       rvals[0] = bs*(baij->rstartbs + i);
1149       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1150       for (j=ai[i]; j<ai[i+1]; j++) {
1151         col = (baij->cstartbs+aj[j])*bs;
1152         for (k=0; k<bs; k++) {
1153           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1154           col++; a += bs;
1155         }
1156       }
1157     }
1158     /* copy over the B part */
1159     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1160     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1161     for (i=0; i<mbs; i++) {
1162       rvals[0] = bs*(baij->rstartbs + i);
1163       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1164       for (j=ai[i]; j<ai[i+1]; j++) {
1165         col = baij->garray[aj[j]]*bs;
1166         for (k=0; k<bs; k++) {
1167           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1168           col++; a += bs;
1169         }
1170       }
1171     }
1172     ierr = PetscFree(rvals);CHKERRQ(ierr);
1173     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1174     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1175     /*
1176        Everyone has to call to draw the matrix since the graphics waits are
1177        synchronized across all processors that share the PetscDraw object
1178     */
1179     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1180     if (!rank) {
1181       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1182       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1183     }
1184     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1185     ierr = MatDestroy(A);CHKERRQ(ierr);
1186   }
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 #undef __FUNCT__
1191 #define __FUNCT__ "MatView_MPIBAIJ"
1192 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1193 {
1194   PetscErrorCode ierr;
1195   PetscTruth     iascii,isdraw,issocket,isbinary;
1196 
1197   PetscFunctionBegin;
1198   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1199   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1200   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1201   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1202   if (iascii || isdraw || issocket || isbinary) {
1203     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1204   } else {
1205     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1206   }
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 #undef __FUNCT__
1211 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1212 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1213 {
1214   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1215   PetscErrorCode ierr;
1216 
1217   PetscFunctionBegin;
1218 #if defined(PETSC_USE_LOG)
1219   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N);
1220 #endif
1221   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1222   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1223   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1224   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1225 #if defined (PETSC_USE_CTABLE)
1226   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1227 #else
1228   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1229 #endif
1230   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1231   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1232   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1233   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1234   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1235   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1236 #if defined(PETSC_USE_MAT_SINGLE)
1237   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1238 #endif
1239   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1240   ierr = PetscFree(baij);CHKERRQ(ierr);
1241 
1242   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1243   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1244   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1245   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1246   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1247   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1248   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "MatMult_MPIBAIJ"
1254 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1255 {
1256   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1257   PetscErrorCode ierr;
1258   PetscInt       nt;
1259 
1260   PetscFunctionBegin;
1261   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1262   if (nt != A->cmap.n) {
1263     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1264   }
1265   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1266   if (nt != A->rmap.n) {
1267     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1268   }
1269   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1270   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1271   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1272   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1278 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1279 {
1280   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1281   PetscErrorCode ierr;
1282 
1283   PetscFunctionBegin;
1284   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1285   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1286   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1287   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1288   PetscFunctionReturn(0);
1289 }
1290 
1291 #undef __FUNCT__
1292 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1293 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1294 {
1295   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1296   PetscErrorCode ierr;
1297   PetscTruth     merged;
1298 
1299   PetscFunctionBegin;
1300   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1301   /* do nondiagonal part */
1302   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1303   if (!merged) {
1304     /* send it on its way */
1305     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1306     /* do local part */
1307     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1308     /* receive remote parts: note this assumes the values are not actually */
1309     /* inserted in yy until the next line */
1310     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1311   } else {
1312     /* do local part */
1313     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1314     /* send it on its way */
1315     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1316     /* values actually were received in the Begin() but we need to call this nop */
1317     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1318   }
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 #undef __FUNCT__
1323 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1324 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1325 {
1326   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1327   PetscErrorCode ierr;
1328 
1329   PetscFunctionBegin;
1330   /* do nondiagonal part */
1331   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1332   /* send it on its way */
1333   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1334   /* do local part */
1335   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1336   /* receive remote parts: note this assumes the values are not actually */
1337   /* inserted in yy until the next line, which is true for my implementation*/
1338   /* but is not perhaps always true. */
1339   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 /*
1344   This only works correctly for square matrices where the subblock A->A is the
1345    diagonal block
1346 */
1347 #undef __FUNCT__
1348 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1349 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1350 {
1351   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1352   PetscErrorCode ierr;
1353 
1354   PetscFunctionBegin;
1355   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1356   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1357   PetscFunctionReturn(0);
1358 }
1359 
1360 #undef __FUNCT__
1361 #define __FUNCT__ "MatScale_MPIBAIJ"
1362 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1363 {
1364   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1365   PetscErrorCode ierr;
1366 
1367   PetscFunctionBegin;
1368   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1369   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 #undef __FUNCT__
1374 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1375 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1376 {
1377   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1378   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1379   PetscErrorCode ierr;
1380   PetscInt       bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1381   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend;
1382   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1383 
1384   PetscFunctionBegin;
1385   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1386   mat->getrowactive = PETSC_TRUE;
1387 
1388   if (!mat->rowvalues && (idx || v)) {
1389     /*
1390         allocate enough space to hold information from the longest row.
1391     */
1392     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1393     PetscInt     max = 1,mbs = mat->mbs,tmp;
1394     for (i=0; i<mbs; i++) {
1395       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1396       if (max < tmp) { max = tmp; }
1397     }
1398     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1399     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1400   }
1401 
1402   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1403   lrow = row - brstart;
1404 
1405   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1406   if (!v)   {pvA = 0; pvB = 0;}
1407   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1408   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1409   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1410   nztot = nzA + nzB;
1411 
1412   cmap  = mat->garray;
1413   if (v  || idx) {
1414     if (nztot) {
1415       /* Sort by increasing column numbers, assuming A and B already sorted */
1416       PetscInt imark = -1;
1417       if (v) {
1418         *v = v_p = mat->rowvalues;
1419         for (i=0; i<nzB; i++) {
1420           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1421           else break;
1422         }
1423         imark = i;
1424         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1425         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1426       }
1427       if (idx) {
1428         *idx = idx_p = mat->rowindices;
1429         if (imark > -1) {
1430           for (i=0; i<imark; i++) {
1431             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1432           }
1433         } else {
1434           for (i=0; i<nzB; i++) {
1435             if (cmap[cworkB[i]/bs] < cstart)
1436               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1437             else break;
1438           }
1439           imark = i;
1440         }
1441         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1442         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1443       }
1444     } else {
1445       if (idx) *idx = 0;
1446       if (v)   *v   = 0;
1447     }
1448   }
1449   *nz = nztot;
1450   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1451   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1452   PetscFunctionReturn(0);
1453 }
1454 
1455 #undef __FUNCT__
1456 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1457 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1458 {
1459   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1460 
1461   PetscFunctionBegin;
1462   if (!baij->getrowactive) {
1463     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1464   }
1465   baij->getrowactive = PETSC_FALSE;
1466   PetscFunctionReturn(0);
1467 }
1468 
1469 #undef __FUNCT__
1470 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1471 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1472 {
1473   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1474   PetscErrorCode ierr;
1475 
1476   PetscFunctionBegin;
1477   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1478   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1479   PetscFunctionReturn(0);
1480 }
1481 
1482 #undef __FUNCT__
1483 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1484 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1485 {
1486   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1487   Mat            A = a->A,B = a->B;
1488   PetscErrorCode ierr;
1489   PetscReal      isend[5],irecv[5];
1490 
1491   PetscFunctionBegin;
1492   info->block_size     = (PetscReal)matin->rmap.bs;
1493   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1494   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1495   isend[3] = info->memory;  isend[4] = info->mallocs;
1496   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1497   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1498   isend[3] += info->memory;  isend[4] += info->mallocs;
1499   if (flag == MAT_LOCAL) {
1500     info->nz_used      = isend[0];
1501     info->nz_allocated = isend[1];
1502     info->nz_unneeded  = isend[2];
1503     info->memory       = isend[3];
1504     info->mallocs      = isend[4];
1505   } else if (flag == MAT_GLOBAL_MAX) {
1506     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1507     info->nz_used      = irecv[0];
1508     info->nz_allocated = irecv[1];
1509     info->nz_unneeded  = irecv[2];
1510     info->memory       = irecv[3];
1511     info->mallocs      = irecv[4];
1512   } else if (flag == MAT_GLOBAL_SUM) {
1513     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1514     info->nz_used      = irecv[0];
1515     info->nz_allocated = irecv[1];
1516     info->nz_unneeded  = irecv[2];
1517     info->memory       = irecv[3];
1518     info->mallocs      = irecv[4];
1519   } else {
1520     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1521   }
1522   info->rows_global       = (PetscReal)A->rmap.N;
1523   info->columns_global    = (PetscReal)A->cmap.N;
1524   info->rows_local        = (PetscReal)A->rmap.N;
1525   info->columns_local     = (PetscReal)A->cmap.N;
1526   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1527   info->fill_ratio_needed = 0;
1528   info->factor_mallocs    = 0;
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 #undef __FUNCT__
1533 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1534 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op)
1535 {
1536   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1537   PetscErrorCode ierr;
1538 
1539   PetscFunctionBegin;
1540   switch (op) {
1541   case MAT_NO_NEW_NONZERO_LOCATIONS:
1542   case MAT_YES_NEW_NONZERO_LOCATIONS:
1543   case MAT_COLUMNS_UNSORTED:
1544   case MAT_COLUMNS_SORTED:
1545   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1546   case MAT_KEEP_ZEROED_ROWS:
1547   case MAT_NEW_NONZERO_LOCATION_ERR:
1548     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1549     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1550     break;
1551   case MAT_ROW_ORIENTED:
1552     a->roworiented = PETSC_TRUE;
1553     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1554     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1555     break;
1556   case MAT_ROWS_SORTED:
1557   case MAT_ROWS_UNSORTED:
1558   case MAT_YES_NEW_DIAGONALS:
1559     ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr);
1560     break;
1561   case MAT_COLUMN_ORIENTED:
1562     a->roworiented = PETSC_FALSE;
1563     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1564     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1565     break;
1566   case MAT_IGNORE_OFF_PROC_ENTRIES:
1567     a->donotstash = PETSC_TRUE;
1568     break;
1569   case MAT_NO_NEW_DIAGONALS:
1570     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1571   case MAT_USE_HASH_TABLE:
1572     a->ht_flag = PETSC_TRUE;
1573     break;
1574   case MAT_SYMMETRIC:
1575   case MAT_STRUCTURALLY_SYMMETRIC:
1576   case MAT_HERMITIAN:
1577   case MAT_SYMMETRY_ETERNAL:
1578     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1579     break;
1580   case MAT_NOT_SYMMETRIC:
1581   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1582   case MAT_NOT_HERMITIAN:
1583   case MAT_NOT_SYMMETRY_ETERNAL:
1584     break;
1585   default:
1586     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1587   }
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 #undef __FUNCT__
1592 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1593 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1594 {
1595   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1596   Mat_SeqBAIJ    *Aloc;
1597   Mat            B;
1598   PetscErrorCode ierr;
1599   PetscInt       M=A->rmap.N,N=A->cmap.N,*ai,*aj,i,*rvals,j,k,col;
1600   PetscInt       bs=A->rmap.bs,mbs=baij->mbs;
1601   MatScalar      *a;
1602 
1603   PetscFunctionBegin;
1604   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1605   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1606   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1607   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1608   ierr = MatMPIBAIJSetPreallocation(B,A->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1609 
1610   /* copy over the A part */
1611   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1612   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1613   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1614 
1615   for (i=0; i<mbs; i++) {
1616     rvals[0] = bs*(baij->rstartbs + i);
1617     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1618     for (j=ai[i]; j<ai[i+1]; j++) {
1619       col = (baij->cstartbs+aj[j])*bs;
1620       for (k=0; k<bs; k++) {
1621         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1622         col++; a += bs;
1623       }
1624     }
1625   }
1626   /* copy over the B part */
1627   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1628   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1629   for (i=0; i<mbs; i++) {
1630     rvals[0] = bs*(baij->rstartbs + i);
1631     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1632     for (j=ai[i]; j<ai[i+1]; j++) {
1633       col = baij->garray[aj[j]]*bs;
1634       for (k=0; k<bs; k++) {
1635         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1636         col++; a += bs;
1637       }
1638     }
1639   }
1640   ierr = PetscFree(rvals);CHKERRQ(ierr);
1641   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1642   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1643 
1644   if (matout) {
1645     *matout = B;
1646   } else {
1647     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1648   }
1649   PetscFunctionReturn(0);
1650 }
1651 
1652 #undef __FUNCT__
1653 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1654 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1655 {
1656   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1657   Mat            a = baij->A,b = baij->B;
1658   PetscErrorCode ierr;
1659   PetscInt       s1,s2,s3;
1660 
1661   PetscFunctionBegin;
1662   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1663   if (rr) {
1664     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1665     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1666     /* Overlap communication with computation. */
1667     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1668   }
1669   if (ll) {
1670     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1671     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1672     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1673   }
1674   /* scale  the diagonal block */
1675   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1676 
1677   if (rr) {
1678     /* Do a scatter end and then right scale the off-diagonal block */
1679     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1680     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1681   }
1682 
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 #undef __FUNCT__
1687 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1688 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1689 {
1690   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1691   PetscErrorCode ierr;
1692   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1693   PetscInt       i,*owners = A->rmap.range;
1694   PetscInt       *nprocs,j,idx,nsends,row;
1695   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1696   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1;
1697   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap.rstart;
1698   MPI_Comm       comm = A->comm;
1699   MPI_Request    *send_waits,*recv_waits;
1700   MPI_Status     recv_status,*send_status;
1701 #if defined(PETSC_DEBUG)
1702   PetscTruth     found = PETSC_FALSE;
1703 #endif
1704 
1705   PetscFunctionBegin;
1706   /*  first count number of contributors to each processor */
1707   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1708   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1709   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1710   j = 0;
1711   for (i=0; i<N; i++) {
1712     if (lastidx > (idx = rows[i])) j = 0;
1713     lastidx = idx;
1714     for (; j<size; j++) {
1715       if (idx >= owners[j] && idx < owners[j+1]) {
1716         nprocs[2*j]++;
1717         nprocs[2*j+1] = 1;
1718         owner[i] = j;
1719 #if defined(PETSC_DEBUG)
1720         found = PETSC_TRUE;
1721 #endif
1722         break;
1723       }
1724     }
1725 #if defined(PETSC_DEBUG)
1726     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1727     found = PETSC_FALSE;
1728 #endif
1729   }
1730   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1731 
1732   /* inform other processors of number of messages and max length*/
1733   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1734 
1735   /* post receives:   */
1736   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1737   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1738   for (i=0; i<nrecvs; i++) {
1739     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1740   }
1741 
1742   /* do sends:
1743      1) starts[i] gives the starting index in svalues for stuff going to
1744      the ith processor
1745   */
1746   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1747   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1748   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1749   starts[0]  = 0;
1750   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1751   for (i=0; i<N; i++) {
1752     svalues[starts[owner[i]]++] = rows[i];
1753   }
1754 
1755   starts[0] = 0;
1756   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1757   count = 0;
1758   for (i=0; i<size; i++) {
1759     if (nprocs[2*i+1]) {
1760       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1761     }
1762   }
1763   ierr = PetscFree(starts);CHKERRQ(ierr);
1764 
1765   base = owners[rank];
1766 
1767   /*  wait on receives */
1768   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
1769   source = lens + nrecvs;
1770   count  = nrecvs; slen = 0;
1771   while (count) {
1772     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1773     /* unpack receives into our local space */
1774     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1775     source[imdex]  = recv_status.MPI_SOURCE;
1776     lens[imdex]    = n;
1777     slen          += n;
1778     count--;
1779   }
1780   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1781 
1782   /* move the data into the send scatter */
1783   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1784   count = 0;
1785   for (i=0; i<nrecvs; i++) {
1786     values = rvalues + i*nmax;
1787     for (j=0; j<lens[i]; j++) {
1788       lrows[count++] = values[j] - base;
1789     }
1790   }
1791   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1792   ierr = PetscFree(lens);CHKERRQ(ierr);
1793   ierr = PetscFree(owner);CHKERRQ(ierr);
1794   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1795 
1796   /* actually zap the local rows */
1797   /*
1798         Zero the required rows. If the "diagonal block" of the matrix
1799      is square and the user wishes to set the diagonal we use separate
1800      code so that MatSetValues() is not called for each diagonal allocating
1801      new memory, thus calling lots of mallocs and slowing things down.
1802 
1803        Contributed by: Matthew Knepley
1804   */
1805   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1806   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1807   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
1808     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1809   } else if (diag != 0.0) {
1810     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1811     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1812       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1813 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1814     }
1815     for (i=0; i<slen; i++) {
1816       row  = lrows[i] + rstart_bs;
1817       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1818     }
1819     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1820     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1821   } else {
1822     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1823   }
1824 
1825   ierr = PetscFree(lrows);CHKERRQ(ierr);
1826 
1827   /* wait on sends */
1828   if (nsends) {
1829     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1830     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1831     ierr = PetscFree(send_status);CHKERRQ(ierr);
1832   }
1833   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1834   ierr = PetscFree(svalues);CHKERRQ(ierr);
1835 
1836   PetscFunctionReturn(0);
1837 }
1838 
1839 #undef __FUNCT__
1840 #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1841 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A)
1842 {
1843   Mat_MPIBAIJ       *a   = (Mat_MPIBAIJ*)A->data;
1844   MPI_Comm          comm = A->comm;
1845   static PetscTruth called = PETSC_FALSE;
1846   PetscErrorCode    ierr;
1847 
1848   PetscFunctionBegin;
1849   if (!a->rank) {
1850     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1851   }
1852   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1853   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1854   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1855   PetscFunctionReturn(0);
1856 }
1857 
1858 #undef __FUNCT__
1859 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1860 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1861 {
1862   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1863   PetscErrorCode ierr;
1864 
1865   PetscFunctionBegin;
1866   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1867   PetscFunctionReturn(0);
1868 }
1869 
1870 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1871 
1872 #undef __FUNCT__
1873 #define __FUNCT__ "MatEqual_MPIBAIJ"
1874 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1875 {
1876   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1877   Mat            a,b,c,d;
1878   PetscTruth     flg;
1879   PetscErrorCode ierr;
1880 
1881   PetscFunctionBegin;
1882   a = matA->A; b = matA->B;
1883   c = matB->A; d = matB->B;
1884 
1885   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1886   if (flg) {
1887     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1888   }
1889   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 #undef __FUNCT__
1894 #define __FUNCT__ "MatCopy_MPIBAIJ"
1895 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1896 {
1897   PetscErrorCode ierr;
1898   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1899   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1900 
1901   PetscFunctionBegin;
1902   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1903   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1904     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1905   } else {
1906     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1907     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1908   }
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 #undef __FUNCT__
1913 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1914 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1915 {
1916   PetscErrorCode ierr;
1917 
1918   PetscFunctionBegin;
1919   ierr =  MatMPIBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1920   PetscFunctionReturn(0);
1921 }
1922 
1923 #include "petscblaslapack.h"
1924 #undef __FUNCT__
1925 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1926 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1927 {
1928   PetscErrorCode ierr;
1929   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1930   PetscBLASInt   bnz,one=1;
1931   Mat_SeqBAIJ    *x,*y;
1932 
1933   PetscFunctionBegin;
1934   if (str == SAME_NONZERO_PATTERN) {
1935     PetscScalar alpha = a;
1936     x = (Mat_SeqBAIJ *)xx->A->data;
1937     y = (Mat_SeqBAIJ *)yy->A->data;
1938     bnz = (PetscBLASInt)x->nz;
1939     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1940     x = (Mat_SeqBAIJ *)xx->B->data;
1941     y = (Mat_SeqBAIJ *)yy->B->data;
1942     bnz = (PetscBLASInt)x->nz;
1943     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1944   } else {
1945     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1946   }
1947   PetscFunctionReturn(0);
1948 }
1949 
1950 #undef __FUNCT__
1951 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1952 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1953 {
1954   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1955   PetscErrorCode ierr;
1956 
1957   PetscFunctionBegin;
1958   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1959   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1960   PetscFunctionReturn(0);
1961 }
1962 
1963 #undef __FUNCT__
1964 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1965 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1966 {
1967   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1968   PetscErrorCode ierr;
1969 
1970   PetscFunctionBegin;
1971   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1972   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1973   PetscFunctionReturn(0);
1974 }
1975 
1976 /* -------------------------------------------------------------------*/
1977 static struct _MatOps MatOps_Values = {
1978        MatSetValues_MPIBAIJ,
1979        MatGetRow_MPIBAIJ,
1980        MatRestoreRow_MPIBAIJ,
1981        MatMult_MPIBAIJ,
1982 /* 4*/ MatMultAdd_MPIBAIJ,
1983        MatMultTranspose_MPIBAIJ,
1984        MatMultTransposeAdd_MPIBAIJ,
1985        0,
1986        0,
1987        0,
1988 /*10*/ 0,
1989        0,
1990        0,
1991        0,
1992        MatTranspose_MPIBAIJ,
1993 /*15*/ MatGetInfo_MPIBAIJ,
1994        MatEqual_MPIBAIJ,
1995        MatGetDiagonal_MPIBAIJ,
1996        MatDiagonalScale_MPIBAIJ,
1997        MatNorm_MPIBAIJ,
1998 /*20*/ MatAssemblyBegin_MPIBAIJ,
1999        MatAssemblyEnd_MPIBAIJ,
2000        0,
2001        MatSetOption_MPIBAIJ,
2002        MatZeroEntries_MPIBAIJ,
2003 /*25*/ MatZeroRows_MPIBAIJ,
2004        0,
2005        0,
2006        0,
2007        0,
2008 /*30*/ MatSetUpPreallocation_MPIBAIJ,
2009        0,
2010        0,
2011        0,
2012        0,
2013 /*35*/ MatDuplicate_MPIBAIJ,
2014        0,
2015        0,
2016        0,
2017        0,
2018 /*40*/ MatAXPY_MPIBAIJ,
2019        MatGetSubMatrices_MPIBAIJ,
2020        MatIncreaseOverlap_MPIBAIJ,
2021        MatGetValues_MPIBAIJ,
2022        MatCopy_MPIBAIJ,
2023 /*45*/ MatPrintHelp_MPIBAIJ,
2024        MatScale_MPIBAIJ,
2025        0,
2026        0,
2027        0,
2028 /*50*/ 0,
2029        0,
2030        0,
2031        0,
2032        0,
2033 /*55*/ 0,
2034        0,
2035        MatSetUnfactored_MPIBAIJ,
2036        0,
2037        MatSetValuesBlocked_MPIBAIJ,
2038 /*60*/ 0,
2039        MatDestroy_MPIBAIJ,
2040        MatView_MPIBAIJ,
2041        0,
2042        0,
2043 /*65*/ 0,
2044        0,
2045        0,
2046        0,
2047        0,
2048 /*70*/ MatGetRowMax_MPIBAIJ,
2049        0,
2050        0,
2051        0,
2052        0,
2053 /*75*/ 0,
2054        0,
2055        0,
2056        0,
2057        0,
2058 /*80*/ 0,
2059        0,
2060        0,
2061        0,
2062        MatLoad_MPIBAIJ,
2063 /*85*/ 0,
2064        0,
2065        0,
2066        0,
2067        0,
2068 /*90*/ 0,
2069        0,
2070        0,
2071        0,
2072        0,
2073 /*95*/ 0,
2074        0,
2075        0,
2076        0,
2077        0,
2078 /*100*/0,
2079        0,
2080        0,
2081        0,
2082        0,
2083 /*105*/0,
2084        MatRealPart_MPIBAIJ,
2085        MatImaginaryPart_MPIBAIJ};
2086 
2087 
2088 EXTERN_C_BEGIN
2089 #undef __FUNCT__
2090 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2091 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2092 {
2093   PetscFunctionBegin;
2094   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2095   *iscopy = PETSC_FALSE;
2096   PetscFunctionReturn(0);
2097 }
2098 EXTERN_C_END
2099 
2100 EXTERN_C_BEGIN
2101 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2102 EXTERN_C_END
2103 
2104 #undef __FUNCT__
2105 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2106 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2107 {
2108   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)B->data;
2109   PetscInt       m = B->rmap.n/bs,cstart = baij->cstartbs, cend = baij->cendbs,j,nnz,i,d;
2110   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = baij->rstartbs,ii;
2111   const PetscInt *JJ;
2112   PetscScalar    *values;
2113   PetscErrorCode ierr;
2114 
2115   PetscFunctionBegin;
2116 #if defined(PETSC_OPT_g)
2117   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2118 #endif
2119   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2120   o_nnz = d_nnz + m;
2121 
2122   for (i=0; i<m; i++) {
2123     nnz     = I[i+1]- I[i];
2124     JJ      = J + I[i];
2125     nnz_max = PetscMax(nnz_max,nnz);
2126 #if defined(PETSC_OPT_g)
2127     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2128 #endif
2129     for (j=0; j<nnz; j++) {
2130       if (*JJ >= cstart) break;
2131       JJ++;
2132     }
2133     d = 0;
2134     for (; j<nnz; j++) {
2135       if (*JJ++ >= cend) break;
2136       d++;
2137     }
2138     d_nnz[i] = d;
2139     o_nnz[i] = nnz - d;
2140   }
2141   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2142   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2143 
2144   if (v) values = (PetscScalar*)v;
2145   else {
2146     ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2147     ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2148   }
2149 
2150   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2151   for (i=0; i<m; i++) {
2152     ii   = i + rstart;
2153     nnz  = I[i+1]- I[i];
2154     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2155   }
2156   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2157   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2158   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2159 
2160   if (!v) {
2161     ierr = PetscFree(values);CHKERRQ(ierr);
2162   }
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 #undef __FUNCT__
2167 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2168 /*@C
2169    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2170    (the default parallel PETSc format).
2171 
2172    Collective on MPI_Comm
2173 
2174    Input Parameters:
2175 +  A - the matrix
2176 .  i - the indices into j for the start of each local row (starts with zero)
2177 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2178 -  v - optional values in the matrix
2179 
2180    Level: developer
2181 
2182 .keywords: matrix, aij, compressed row, sparse, parallel
2183 
2184 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2185 @*/
2186 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2187 {
2188   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2189 
2190   PetscFunctionBegin;
2191   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2192   if (f) {
2193     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2194   }
2195   PetscFunctionReturn(0);
2196 }
2197 
2198 EXTERN_C_BEGIN
2199 #undef __FUNCT__
2200 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2201 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2202 {
2203   Mat_MPIBAIJ    *b;
2204   PetscErrorCode ierr;
2205   PetscInt       i;
2206 
2207   PetscFunctionBegin;
2208   B->preallocated = PETSC_TRUE;
2209   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2210 
2211   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2212   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2213   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2214   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2215   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2216 
2217   B->rmap.bs  = bs;
2218   B->cmap.bs  = bs;
2219   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2220   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2221 
2222   if (d_nnz) {
2223     for (i=0; i<B->rmap.n/bs; i++) {
2224       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2225     }
2226   }
2227   if (o_nnz) {
2228     for (i=0; i<B->rmap.n/bs; i++) {
2229       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2230     }
2231   }
2232 
2233   b = (Mat_MPIBAIJ*)B->data;
2234   b->bs2 = bs*bs;
2235   b->mbs = B->rmap.n/bs;
2236   b->nbs = B->cmap.n/bs;
2237   b->Mbs = B->rmap.N/bs;
2238   b->Nbs = B->cmap.N/bs;
2239 
2240   for (i=0; i<=b->size; i++) {
2241     b->rangebs[i] = B->rmap.range[i]/bs;
2242   }
2243   b->rstartbs = B->rmap.rstart/bs;
2244   b->rendbs   = B->rmap.rend/bs;
2245   b->cstartbs = B->cmap.rstart/bs;
2246   b->cendbs   = B->cmap.rend/bs;
2247 
2248   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2249   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2250   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2251   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2252   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2253   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2254   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2255   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2256   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2257   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2258 
2259   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2260 
2261   PetscFunctionReturn(0);
2262 }
2263 EXTERN_C_END
2264 
2265 EXTERN_C_BEGIN
2266 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2267 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2268 EXTERN_C_END
2269 
2270 /*MC
2271    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2272 
2273    Options Database Keys:
2274 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2275 
2276   Level: beginner
2277 
2278 .seealso: MatCreateMPIBAIJ
2279 M*/
2280 
2281 EXTERN_C_BEGIN
2282 #undef __FUNCT__
2283 #define __FUNCT__ "MatCreate_MPIBAIJ"
2284 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2285 {
2286   Mat_MPIBAIJ    *b;
2287   PetscErrorCode ierr;
2288   PetscTruth     flg;
2289 
2290   PetscFunctionBegin;
2291   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2292   B->data = (void*)b;
2293 
2294 
2295   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2296   B->mapping    = 0;
2297   B->factor     = 0;
2298   B->assembled  = PETSC_FALSE;
2299 
2300   B->insertmode = NOT_SET_VALUES;
2301   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2302   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2303 
2304   /* build local table of row and column ownerships */
2305   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
2306 
2307   /* build cache for off array entries formed */
2308   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2309   b->donotstash  = PETSC_FALSE;
2310   b->colmap      = PETSC_NULL;
2311   b->garray      = PETSC_NULL;
2312   b->roworiented = PETSC_TRUE;
2313 
2314 #if defined(PETSC_USE_MAT_SINGLE)
2315   /* stuff for MatSetValues_XXX in single precision */
2316   b->setvalueslen     = 0;
2317   b->setvaluescopy    = PETSC_NULL;
2318 #endif
2319 
2320   /* stuff used in block assembly */
2321   b->barray       = 0;
2322 
2323   /* stuff used for matrix vector multiply */
2324   b->lvec         = 0;
2325   b->Mvctx        = 0;
2326 
2327   /* stuff for MatGetRow() */
2328   b->rowindices   = 0;
2329   b->rowvalues    = 0;
2330   b->getrowactive = PETSC_FALSE;
2331 
2332   /* hash table stuff */
2333   b->ht           = 0;
2334   b->hd           = 0;
2335   b->ht_size      = 0;
2336   b->ht_flag      = PETSC_FALSE;
2337   b->ht_fact      = 0;
2338   b->ht_total_ct  = 0;
2339   b->ht_insert_ct = 0;
2340 
2341   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2342   if (flg) {
2343     PetscReal fact = 1.39;
2344     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2345     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2346     if (fact <= 1.0) fact = 1.39;
2347     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2348     ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2349   }
2350   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2351                                      "MatStoreValues_MPIBAIJ",
2352                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2353   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2354                                      "MatRetrieveValues_MPIBAIJ",
2355                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2356   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2357                                      "MatGetDiagonalBlock_MPIBAIJ",
2358                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2359   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2360                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2361                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2362   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2363 				     "MatMPIBAIJSetPreallocationCSR_MPIAIJ",
2364 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2365   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2366                                      "MatDiagonalScaleLocal_MPIBAIJ",
2367                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2368   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2369                                      "MatSetHashTableFactor_MPIBAIJ",
2370                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2371   PetscFunctionReturn(0);
2372 }
2373 EXTERN_C_END
2374 
2375 /*MC
2376    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2377 
2378    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2379    and MATMPIBAIJ otherwise.
2380 
2381    Options Database Keys:
2382 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2383 
2384   Level: beginner
2385 
2386 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2387 M*/
2388 
2389 EXTERN_C_BEGIN
2390 #undef __FUNCT__
2391 #define __FUNCT__ "MatCreate_BAIJ"
2392 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
2393 {
2394   PetscErrorCode ierr;
2395   PetscMPIInt    size;
2396 
2397   PetscFunctionBegin;
2398   ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr);
2399   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2400   if (size == 1) {
2401     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2402   } else {
2403     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2404   }
2405   PetscFunctionReturn(0);
2406 }
2407 EXTERN_C_END
2408 
2409 #undef __FUNCT__
2410 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2411 /*@C
2412    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2413    (block compressed row).  For good matrix assembly performance
2414    the user should preallocate the matrix storage by setting the parameters
2415    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2416    performance can be increased by more than a factor of 50.
2417 
2418    Collective on Mat
2419 
2420    Input Parameters:
2421 +  A - the matrix
2422 .  bs   - size of blockk
2423 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2424            submatrix  (same for all local rows)
2425 .  d_nnz - array containing the number of block nonzeros in the various block rows
2426            of the in diagonal portion of the local (possibly different for each block
2427            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2428 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2429            submatrix (same for all local rows).
2430 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2431            off-diagonal portion of the local submatrix (possibly different for
2432            each block row) or PETSC_NULL.
2433 
2434    If the *_nnz parameter is given then the *_nz parameter is ignored
2435 
2436    Options Database Keys:
2437 .   -mat_no_unroll - uses code that does not unroll the loops in the
2438                      block calculations (much slower)
2439 .   -mat_block_size - size of the blocks to use
2440 
2441    Notes:
2442    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2443    than it must be used on all processors that share the object for that argument.
2444 
2445    Storage Information:
2446    For a square global matrix we define each processor's diagonal portion
2447    to be its local rows and the corresponding columns (a square submatrix);
2448    each processor's off-diagonal portion encompasses the remainder of the
2449    local matrix (a rectangular submatrix).
2450 
2451    The user can specify preallocated storage for the diagonal part of
2452    the local submatrix with either d_nz or d_nnz (not both).  Set
2453    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2454    memory allocation.  Likewise, specify preallocated storage for the
2455    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2456 
2457    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2458    the figure below we depict these three local rows and all columns (0-11).
2459 
2460 .vb
2461            0 1 2 3 4 5 6 7 8 9 10 11
2462           -------------------
2463    row 3  |  o o o d d d o o o o o o
2464    row 4  |  o o o d d d o o o o o o
2465    row 5  |  o o o d d d o o o o o o
2466           -------------------
2467 .ve
2468 
2469    Thus, any entries in the d locations are stored in the d (diagonal)
2470    submatrix, and any entries in the o locations are stored in the
2471    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2472    stored simply in the MATSEQBAIJ format for compressed row storage.
2473 
2474    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2475    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2476    In general, for PDE problems in which most nonzeros are near the diagonal,
2477    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2478    or you will get TERRIBLE performance; see the users' manual chapter on
2479    matrices.
2480 
2481    Level: intermediate
2482 
2483 .keywords: matrix, block, aij, compressed row, sparse, parallel
2484 
2485 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2486 @*/
2487 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2488 {
2489   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2490 
2491   PetscFunctionBegin;
2492   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2493   if (f) {
2494     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2495   }
2496   PetscFunctionReturn(0);
2497 }
2498 
2499 #undef __FUNCT__
2500 #define __FUNCT__ "MatCreateMPIBAIJ"
2501 /*@C
2502    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2503    (block compressed row).  For good matrix assembly performance
2504    the user should preallocate the matrix storage by setting the parameters
2505    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2506    performance can be increased by more than a factor of 50.
2507 
2508    Collective on MPI_Comm
2509 
2510    Input Parameters:
2511 +  comm - MPI communicator
2512 .  bs   - size of blockk
2513 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2514            This value should be the same as the local size used in creating the
2515            y vector for the matrix-vector product y = Ax.
2516 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2517            This value should be the same as the local size used in creating the
2518            x vector for the matrix-vector product y = Ax.
2519 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2520 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2521 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2522            submatrix  (same for all local rows)
2523 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2524            of the in diagonal portion of the local (possibly different for each block
2525            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2526 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2527            submatrix (same for all local rows).
2528 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2529            off-diagonal portion of the local submatrix (possibly different for
2530            each block row) or PETSC_NULL.
2531 
2532    Output Parameter:
2533 .  A - the matrix
2534 
2535    Options Database Keys:
2536 .   -mat_no_unroll - uses code that does not unroll the loops in the
2537                      block calculations (much slower)
2538 .   -mat_block_size - size of the blocks to use
2539 
2540    Notes:
2541    If the *_nnz parameter is given then the *_nz parameter is ignored
2542 
2543    A nonzero block is any block that as 1 or more nonzeros in it
2544 
2545    The user MUST specify either the local or global matrix dimensions
2546    (possibly both).
2547 
2548    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2549    than it must be used on all processors that share the object for that argument.
2550 
2551    Storage Information:
2552    For a square global matrix we define each processor's diagonal portion
2553    to be its local rows and the corresponding columns (a square submatrix);
2554    each processor's off-diagonal portion encompasses the remainder of the
2555    local matrix (a rectangular submatrix).
2556 
2557    The user can specify preallocated storage for the diagonal part of
2558    the local submatrix with either d_nz or d_nnz (not both).  Set
2559    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2560    memory allocation.  Likewise, specify preallocated storage for the
2561    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2562 
2563    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2564    the figure below we depict these three local rows and all columns (0-11).
2565 
2566 .vb
2567            0 1 2 3 4 5 6 7 8 9 10 11
2568           -------------------
2569    row 3  |  o o o d d d o o o o o o
2570    row 4  |  o o o d d d o o o o o o
2571    row 5  |  o o o d d d o o o o o o
2572           -------------------
2573 .ve
2574 
2575    Thus, any entries in the d locations are stored in the d (diagonal)
2576    submatrix, and any entries in the o locations are stored in the
2577    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2578    stored simply in the MATSEQBAIJ format for compressed row storage.
2579 
2580    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2581    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2582    In general, for PDE problems in which most nonzeros are near the diagonal,
2583    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2584    or you will get TERRIBLE performance; see the users' manual chapter on
2585    matrices.
2586 
2587    Level: intermediate
2588 
2589 .keywords: matrix, block, aij, compressed row, sparse, parallel
2590 
2591 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2592 @*/
2593 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2594 {
2595   PetscErrorCode ierr;
2596   PetscMPIInt    size;
2597 
2598   PetscFunctionBegin;
2599   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2600   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2601   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2602   if (size > 1) {
2603     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2604     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2605   } else {
2606     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2607     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2608   }
2609   PetscFunctionReturn(0);
2610 }
2611 
2612 #undef __FUNCT__
2613 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2614 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2615 {
2616   Mat            mat;
2617   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2618   PetscErrorCode ierr;
2619   PetscInt       len=0;
2620 
2621   PetscFunctionBegin;
2622   *newmat       = 0;
2623   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2624   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2625   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2626   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2627 
2628   mat->factor       = matin->factor;
2629   mat->preallocated = PETSC_TRUE;
2630   mat->assembled    = PETSC_TRUE;
2631   mat->insertmode   = NOT_SET_VALUES;
2632 
2633   a      = (Mat_MPIBAIJ*)mat->data;
2634   mat->rmap.bs  = matin->rmap.bs;
2635   a->bs2   = oldmat->bs2;
2636   a->mbs   = oldmat->mbs;
2637   a->nbs   = oldmat->nbs;
2638   a->Mbs   = oldmat->Mbs;
2639   a->Nbs   = oldmat->Nbs;
2640 
2641   ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2642   ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2643 
2644   a->size         = oldmat->size;
2645   a->rank         = oldmat->rank;
2646   a->donotstash   = oldmat->donotstash;
2647   a->roworiented  = oldmat->roworiented;
2648   a->rowindices   = 0;
2649   a->rowvalues    = 0;
2650   a->getrowactive = PETSC_FALSE;
2651   a->barray       = 0;
2652   a->rstartbs     = oldmat->rstartbs;
2653   a->rendbs       = oldmat->rendbs;
2654   a->cstartbs     = oldmat->cstartbs;
2655   a->cendbs       = oldmat->cendbs;
2656 
2657   /* hash table stuff */
2658   a->ht           = 0;
2659   a->hd           = 0;
2660   a->ht_size      = 0;
2661   a->ht_flag      = oldmat->ht_flag;
2662   a->ht_fact      = oldmat->ht_fact;
2663   a->ht_total_ct  = 0;
2664   a->ht_insert_ct = 0;
2665 
2666   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
2667   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2668   ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr);
2669   if (oldmat->colmap) {
2670 #if defined (PETSC_USE_CTABLE)
2671   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2672 #else
2673   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2674   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2675   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2676 #endif
2677   } else a->colmap = 0;
2678 
2679   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2680     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2681     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2682     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2683   } else a->garray = 0;
2684 
2685   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2686   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2687   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2688   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2689 
2690   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2691   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2692   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2693   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2694   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2695   *newmat = mat;
2696 
2697   PetscFunctionReturn(0);
2698 }
2699 
2700 #include "petscsys.h"
2701 
2702 #undef __FUNCT__
2703 #define __FUNCT__ "MatLoad_MPIBAIJ"
2704 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2705 {
2706   Mat            A;
2707   PetscErrorCode ierr;
2708   int            fd;
2709   PetscInt       i,nz,j,rstart,rend;
2710   PetscScalar    *vals,*buf;
2711   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2712   MPI_Status     status;
2713   PetscMPIInt    rank,size,maxnz;
2714   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2715   PetscInt       *locrowlens,*procsnz = 0,*browners;
2716   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
2717   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
2718   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2719   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
2720 
2721   PetscFunctionBegin;
2722   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2723 
2724   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2725   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2726   if (!rank) {
2727     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2728     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2729     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2730   }
2731 
2732   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2733   M = header[1]; N = header[2];
2734 
2735   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2736 
2737   /*
2738      This code adds extra rows to make sure the number of rows is
2739      divisible by the blocksize
2740   */
2741   Mbs        = M/bs;
2742   extra_rows = bs - M + bs*Mbs;
2743   if (extra_rows == bs) extra_rows = 0;
2744   else                  Mbs++;
2745   if (extra_rows && !rank) {
2746     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2747   }
2748 
2749   /* determine ownership of all rows */
2750   mbs        = Mbs/size + ((Mbs % size) > rank);
2751   m          = mbs*bs;
2752   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
2753   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2754 
2755   /* process 0 needs enough room for process with most rows */
2756   if (!rank) {
2757     mmax = rowners[1];
2758     for (i=2; i<size; i++) {
2759       mmax = PetscMax(mmax,rowners[i]);
2760     }
2761     mmax*=bs;
2762   } else mmax = m;
2763 
2764   rowners[0] = 0;
2765   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
2766   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2767   rstart = rowners[rank];
2768   rend   = rowners[rank+1];
2769 
2770   /* distribute row lengths to all processors */
2771   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2772   if (!rank) {
2773     mend = m;
2774     if (size == 1) mend = mend - extra_rows;
2775     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
2776     for (j=mend; j<m; j++) locrowlens[j] = 1;
2777     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2778     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2779     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2780     for (j=0; j<m; j++) {
2781       procsnz[0] += locrowlens[j];
2782     }
2783     for (i=1; i<size; i++) {
2784       mend = browners[i+1] - browners[i];
2785       if (i == size-1) mend = mend - extra_rows;
2786       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
2787       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2788       /* calculate the number of nonzeros on each processor */
2789       for (j=0; j<browners[i+1]-browners[i]; j++) {
2790         procsnz[i] += rowlengths[j];
2791       }
2792       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2793     }
2794     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2795   } else {
2796     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2797   }
2798 
2799   if (!rank) {
2800     /* determine max buffer needed and allocate it */
2801     maxnz = procsnz[0];
2802     for (i=1; i<size; i++) {
2803       maxnz = PetscMax(maxnz,procsnz[i]);
2804     }
2805     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2806 
2807     /* read in my part of the matrix column indices  */
2808     nz     = procsnz[0];
2809     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2810     mycols = ibuf;
2811     if (size == 1)  nz -= extra_rows;
2812     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2813     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2814 
2815     /* read in every ones (except the last) and ship off */
2816     for (i=1; i<size-1; i++) {
2817       nz   = procsnz[i];
2818       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2819       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2820     }
2821     /* read in the stuff for the last proc */
2822     if (size != 1) {
2823       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2824       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2825       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2826       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2827     }
2828     ierr = PetscFree(cols);CHKERRQ(ierr);
2829   } else {
2830     /* determine buffer space needed for message */
2831     nz = 0;
2832     for (i=0; i<m; i++) {
2833       nz += locrowlens[i];
2834     }
2835     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2836     mycols = ibuf;
2837     /* receive message of column indices*/
2838     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2839     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2840     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2841   }
2842 
2843   /* loop over local rows, determining number of off diagonal entries */
2844   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2845   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2846   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2847   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2848   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2849   rowcount = 0; nzcount = 0;
2850   for (i=0; i<mbs; i++) {
2851     dcount  = 0;
2852     odcount = 0;
2853     for (j=0; j<bs; j++) {
2854       kmax = locrowlens[rowcount];
2855       for (k=0; k<kmax; k++) {
2856         tmp = mycols[nzcount++]/bs;
2857         if (!mask[tmp]) {
2858           mask[tmp] = 1;
2859           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2860           else masked1[dcount++] = tmp;
2861         }
2862       }
2863       rowcount++;
2864     }
2865 
2866     dlens[i]  = dcount;
2867     odlens[i] = odcount;
2868 
2869     /* zero out the mask elements we set */
2870     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2871     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2872   }
2873 
2874   /* create our matrix */
2875   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2876   ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2877   ierr = MatSetType(A,type);CHKERRQ(ierr)
2878   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2879 
2880   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
2881   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2882 
2883   if (!rank) {
2884     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2885     /* read in my part of the matrix numerical values  */
2886     nz = procsnz[0];
2887     vals = buf;
2888     mycols = ibuf;
2889     if (size == 1)  nz -= extra_rows;
2890     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2891     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2892 
2893     /* insert into matrix */
2894     jj      = rstart*bs;
2895     for (i=0; i<m; i++) {
2896       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2897       mycols += locrowlens[i];
2898       vals   += locrowlens[i];
2899       jj++;
2900     }
2901     /* read in other processors (except the last one) and ship out */
2902     for (i=1; i<size-1; i++) {
2903       nz   = procsnz[i];
2904       vals = buf;
2905       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2906       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2907     }
2908     /* the last proc */
2909     if (size != 1){
2910       nz   = procsnz[i] - extra_rows;
2911       vals = buf;
2912       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2913       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2914       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2915     }
2916     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2917   } else {
2918     /* receive numeric values */
2919     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2920 
2921     /* receive message of values*/
2922     vals   = buf;
2923     mycols = ibuf;
2924     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2925     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2926     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2927 
2928     /* insert into matrix */
2929     jj      = rstart*bs;
2930     for (i=0; i<m; i++) {
2931       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2932       mycols += locrowlens[i];
2933       vals   += locrowlens[i];
2934       jj++;
2935     }
2936   }
2937   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2938   ierr = PetscFree(buf);CHKERRQ(ierr);
2939   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2940   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2941   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2942   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2943   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2944   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2945 
2946   *newmat = A;
2947   PetscFunctionReturn(0);
2948 }
2949 
2950 #undef __FUNCT__
2951 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2952 /*@
2953    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2954 
2955    Input Parameters:
2956 .  mat  - the matrix
2957 .  fact - factor
2958 
2959    Collective on Mat
2960 
2961    Level: advanced
2962 
2963   Notes:
2964    This can also be set by the command line option: -mat_use_hash_table fact
2965 
2966 .keywords: matrix, hashtable, factor, HT
2967 
2968 .seealso: MatSetOption()
2969 @*/
2970 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2971 {
2972   PetscErrorCode ierr,(*f)(Mat,PetscReal);
2973 
2974   PetscFunctionBegin;
2975   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2976   if (f) {
2977     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2978   }
2979   PetscFunctionReturn(0);
2980 }
2981 
2982 EXTERN_C_BEGIN
2983 #undef __FUNCT__
2984 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2985 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2986 {
2987   Mat_MPIBAIJ *baij;
2988 
2989   PetscFunctionBegin;
2990   baij = (Mat_MPIBAIJ*)mat->data;
2991   baij->ht_fact = fact;
2992   PetscFunctionReturn(0);
2993 }
2994 EXTERN_C_END
2995 
2996 #undef __FUNCT__
2997 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2998 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2999 {
3000   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3001   PetscFunctionBegin;
3002   *Ad     = a->A;
3003   *Ao     = a->B;
3004   *colmap = a->garray;
3005   PetscFunctionReturn(0);
3006 }
3007