xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision ce0a2cd1da0658c2b28aad1be2e2c8e41567bece)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 
5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
15 
16 #undef __FUNCT__
17 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
18 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
19 {
20   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
21   PetscErrorCode ierr;
22   PetscInt       i,*idxb = 0;
23   PetscScalar    *va,*vb;
24   Vec            vtmp;
25 
26   PetscFunctionBegin;
27 
28   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
29   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
30   if (idx) {
31     for (i=0; i<A->cmap.n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap.rstart;}
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc(A->rmap.n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap.n; i++){
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap.bs*a->garray[idxb[i]/A->cmap.bs] + (idxb[i] % A->cmap.bs);}
41   }
42 
43   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
44   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
45   if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);}
46   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
47   PetscFunctionReturn(0);
48 }
49 
50 EXTERN_C_BEGIN
51 #undef __FUNCT__
52 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
54 {
55   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
56   PetscErrorCode ierr;
57 
58   PetscFunctionBegin;
59   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
60   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
61   PetscFunctionReturn(0);
62 }
63 EXTERN_C_END
64 
65 EXTERN_C_BEGIN
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 EXTERN_C_END
79 
80 /*
81      Local utility routine that creates a mapping from the global column
82    number to the local number in the off-diagonal part of the local
83    storage of the matrix.  This is done in a non scable way since the
84    length of colmap equals the global matrix length.
85 */
86 #undef __FUNCT__
87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
89 {
90   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
91   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
92   PetscErrorCode ierr;
93   PetscInt       nbs = B->nbs,i,bs=mat->rmap.bs;
94 
95   PetscFunctionBegin;
96 #if defined (PETSC_USE_CTABLE)
97   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
98   for (i=0; i<nbs; i++){
99     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
100   }
101 #else
102   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
103   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
105   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
106 #endif
107   PetscFunctionReturn(0);
108 }
109 
110 #define CHUNKSIZE  10
111 
112 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
113 { \
114  \
115     brow = row/bs;  \
116     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
117     rmax = aimax[brow]; nrow = ailen[brow]; \
118       bcol = col/bs; \
119       ridx = row % bs; cidx = col % bs; \
120       low = 0; high = nrow; \
121       while (high-low > 3) { \
122         t = (low+high)/2; \
123         if (rp[t] > bcol) high = t; \
124         else              low  = t; \
125       } \
126       for (_i=low; _i<high; _i++) { \
127         if (rp[_i] > bcol) break; \
128         if (rp[_i] == bcol) { \
129           bap  = ap +  bs2*_i + bs*cidx + ridx; \
130           if (addv == ADD_VALUES) *bap += value;  \
131           else                    *bap  = value;  \
132           goto a_noinsert; \
133         } \
134       } \
135       if (a->nonew == 1) goto a_noinsert; \
136       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
137       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
138       N = nrow++ - 1;  \
139       /* shift up all the later entries in this row */ \
140       for (ii=N; ii>=_i; ii--) { \
141         rp[ii+1] = rp[ii]; \
142         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
143       } \
144       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
145       rp[_i]                      = bcol;  \
146       ap[bs2*_i + bs*cidx + ridx] = value;  \
147       a_noinsert:; \
148     ailen[brow] = nrow; \
149 }
150 
151 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
152 { \
153     brow = row/bs;  \
154     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
155     rmax = bimax[brow]; nrow = bilen[brow]; \
156       bcol = col/bs; \
157       ridx = row % bs; cidx = col % bs; \
158       low = 0; high = nrow; \
159       while (high-low > 3) { \
160         t = (low+high)/2; \
161         if (rp[t] > bcol) high = t; \
162         else              low  = t; \
163       } \
164       for (_i=low; _i<high; _i++) { \
165         if (rp[_i] > bcol) break; \
166         if (rp[_i] == bcol) { \
167           bap  = ap +  bs2*_i + bs*cidx + ridx; \
168           if (addv == ADD_VALUES) *bap += value;  \
169           else                    *bap  = value;  \
170           goto b_noinsert; \
171         } \
172       } \
173       if (b->nonew == 1) goto b_noinsert; \
174       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
175       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
176       CHKMEMQ;\
177       N = nrow++ - 1;  \
178       /* shift up all the later entries in this row */ \
179       for (ii=N; ii>=_i; ii--) { \
180         rp[ii+1] = rp[ii]; \
181         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
182       } \
183       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
184       rp[_i]                      = bcol;  \
185       ap[bs2*_i + bs*cidx + ridx] = value;  \
186       b_noinsert:; \
187     bilen[brow] = nrow; \
188 }
189 
190 #undef __FUNCT__
191 #define __FUNCT__ "MatSetValues_MPIBAIJ"
192 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
193 {
194   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
195   MatScalar      value;
196   PetscTruth     roworiented = baij->roworiented;
197   PetscErrorCode ierr;
198   PetscInt       i,j,row,col;
199   PetscInt       rstart_orig=mat->rmap.rstart;
200   PetscInt       rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart;
201   PetscInt       cend_orig=mat->cmap.rend,bs=mat->rmap.bs;
202 
203   /* Some Variables required in the macro */
204   Mat            A = baij->A;
205   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
206   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
207   MatScalar      *aa=a->a;
208 
209   Mat            B = baij->B;
210   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
211   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
212   MatScalar      *ba=b->a;
213 
214   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
215   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
216   MatScalar      *ap,*bap;
217 
218   PetscFunctionBegin;
219   for (i=0; i<m; i++) {
220     if (im[i] < 0) continue;
221 #if defined(PETSC_USE_DEBUG)
222     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
223 #endif
224     if (im[i] >= rstart_orig && im[i] < rend_orig) {
225       row = im[i] - rstart_orig;
226       for (j=0; j<n; j++) {
227         if (in[j] >= cstart_orig && in[j] < cend_orig){
228           col = in[j] - cstart_orig;
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_DEBUG)
234         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap.N-1);}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!baij->colmap) {
239               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
243             col  = col - 1;
244 #else
245             col = baij->colmap[in[j]/bs] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
248               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
251               B = baij->B;
252               b = (Mat_SeqBAIJ*)(B)->data;
253               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
254               ba=b->a;
255             } else col += in[j]%bs;
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
259           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
260         }
261       }
262     } else {
263       if (!baij->donotstash) {
264         if (roworiented) {
265           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
266         } else {
267           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
268         }
269       }
270     }
271   }
272   PetscFunctionReturn(0);
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
277 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
278 {
279   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
280   const PetscScalar *value;
281   MatScalar         *barray=baij->barray;
282   PetscTruth        roworiented = baij->roworiented;
283   PetscErrorCode    ierr;
284   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
285   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
286   PetscInt          cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2;
287 
288   PetscFunctionBegin;
289   if(!barray) {
290     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
291     baij->barray = barray;
292   }
293 
294   if (roworiented) {
295     stepval = (n-1)*bs;
296   } else {
297     stepval = (m-1)*bs;
298   }
299   for (i=0; i<m; i++) {
300     if (im[i] < 0) continue;
301 #if defined(PETSC_USE_DEBUG)
302     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
303 #endif
304     if (im[i] >= rstart && im[i] < rend) {
305       row = im[i] - rstart;
306       for (j=0; j<n; j++) {
307         /* If NumCol = 1 then a copy is not required */
308         if ((roworiented) && (n == 1)) {
309           barray = (MatScalar*)v + i*bs2;
310         } else if((!roworiented) && (m == 1)) {
311           barray = (MatScalar*)v + j*bs2;
312         } else { /* Here a copy is required */
313           if (roworiented) {
314             value = v + i*(stepval+bs)*bs + j*bs;
315           } else {
316             value = v + j*(stepval+bs)*bs + i*bs;
317           }
318           for (ii=0; ii<bs; ii++,value+=stepval) {
319             for (jj=0; jj<bs; jj++) {
320               *barray++  = *value++;
321             }
322           }
323           barray -=bs2;
324         }
325 
326         if (in[j] >= cstart && in[j] < cend){
327           col  = in[j] - cstart;
328           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
329         }
330         else if (in[j] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
333 #endif
334         else {
335           if (mat->was_assembled) {
336             if (!baij->colmap) {
337               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
338             }
339 
340 #if defined(PETSC_USE_DEBUG)
341 #if defined (PETSC_USE_CTABLE)
342             { PetscInt data;
343               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
344               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
345             }
346 #else
347             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
348 #endif
349 #endif
350 #if defined (PETSC_USE_CTABLE)
351 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
352             col  = (col - 1)/bs;
353 #else
354             col = (baij->colmap[in[j]] - 1)/bs;
355 #endif
356             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
357               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
358               col =  in[j];
359             }
360           }
361           else col = in[j];
362           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
363         }
364       }
365     } else {
366       if (!baij->donotstash) {
367         if (roworiented) {
368           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
369         } else {
370           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
371         }
372       }
373     }
374   }
375   PetscFunctionReturn(0);
376 }
377 
378 #define HASH_KEY 0.6180339887
379 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
380 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
381 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
382 #undef __FUNCT__
383 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
384 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
385 {
386   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
387   PetscTruth     roworiented = baij->roworiented;
388   PetscErrorCode ierr;
389   PetscInt       i,j,row,col;
390   PetscInt       rstart_orig=mat->rmap.rstart;
391   PetscInt       rend_orig=mat->rmap.rend,Nbs=baij->Nbs;
392   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap.bs,*HT=baij->ht,idx;
393   PetscReal      tmp;
394   MatScalar      **HD = baij->hd,value;
395 #if defined(PETSC_USE_DEBUG)
396   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
397 #endif
398 
399   PetscFunctionBegin;
400 
401   for (i=0; i<m; i++) {
402 #if defined(PETSC_USE_DEBUG)
403     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
404     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
405 #endif
406       row = im[i];
407     if (row >= rstart_orig && row < rend_orig) {
408       for (j=0; j<n; j++) {
409         col = in[j];
410         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
411         /* Look up PetscInto the Hash Table */
412         key = (row/bs)*Nbs+(col/bs)+1;
413         h1  = HASH(size,key,tmp);
414 
415 
416         idx = h1;
417 #if defined(PETSC_USE_DEBUG)
418         insert_ct++;
419         total_ct++;
420         if (HT[idx] != key) {
421           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
422           if (idx == size) {
423             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
424             if (idx == h1) {
425               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
426             }
427           }
428         }
429 #else
430         if (HT[idx] != key) {
431           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
432           if (idx == size) {
433             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
434             if (idx == h1) {
435               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
436             }
437           }
438         }
439 #endif
440         /* A HASH table entry is found, so insert the values at the correct address */
441         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
442         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
443       }
444     } else {
445       if (!baij->donotstash) {
446         if (roworiented) {
447           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
448         } else {
449           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
450         }
451       }
452     }
453   }
454 #if defined(PETSC_USE_DEBUG)
455   baij->ht_total_ct = total_ct;
456   baij->ht_insert_ct = insert_ct;
457 #endif
458   PetscFunctionReturn(0);
459 }
460 
461 #undef __FUNCT__
462 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
463 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
464 {
465   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
466   PetscTruth        roworiented = baij->roworiented;
467   PetscErrorCode    ierr;
468   PetscInt          i,j,ii,jj,row,col;
469   PetscInt          rstart=baij->rstartbs;
470   PetscInt          rend=mat->rmap.rend,stepval,bs=mat->rmap.bs,bs2=baij->bs2,nbs2=n*bs2;
471   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
472   PetscReal         tmp;
473   MatScalar         **HD = baij->hd,*baij_a;
474   const PetscScalar *v_t,*value;
475 #if defined(PETSC_USE_DEBUG)
476   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
477 #endif
478 
479   PetscFunctionBegin;
480 
481   if (roworiented) {
482     stepval = (n-1)*bs;
483   } else {
484     stepval = (m-1)*bs;
485   }
486   for (i=0; i<m; i++) {
487 #if defined(PETSC_USE_DEBUG)
488     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
489     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
490 #endif
491     row   = im[i];
492     v_t   = v + i*nbs2;
493     if (row >= rstart && row < rend) {
494       for (j=0; j<n; j++) {
495         col = in[j];
496 
497         /* Look up into the Hash Table */
498         key = row*Nbs+col+1;
499         h1  = HASH(size,key,tmp);
500 
501         idx = h1;
502 #if defined(PETSC_USE_DEBUG)
503         total_ct++;
504         insert_ct++;
505        if (HT[idx] != key) {
506           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
507           if (idx == size) {
508             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
509             if (idx == h1) {
510               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
511             }
512           }
513         }
514 #else
515         if (HT[idx] != key) {
516           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
517           if (idx == size) {
518             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
519             if (idx == h1) {
520               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
521             }
522           }
523         }
524 #endif
525         baij_a = HD[idx];
526         if (roworiented) {
527           /*value = v + i*(stepval+bs)*bs + j*bs;*/
528           /* value = v + (i*(stepval+bs)+j)*bs; */
529           value = v_t;
530           v_t  += bs;
531           if (addv == ADD_VALUES) {
532             for (ii=0; ii<bs; ii++,value+=stepval) {
533               for (jj=ii; jj<bs2; jj+=bs) {
534                 baij_a[jj]  += *value++;
535               }
536             }
537           } else {
538             for (ii=0; ii<bs; ii++,value+=stepval) {
539               for (jj=ii; jj<bs2; jj+=bs) {
540                 baij_a[jj]  = *value++;
541               }
542             }
543           }
544         } else {
545           value = v + j*(stepval+bs)*bs + i*bs;
546           if (addv == ADD_VALUES) {
547             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
548               for (jj=0; jj<bs; jj++) {
549                 baij_a[jj]  += *value++;
550               }
551             }
552           } else {
553             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
554               for (jj=0; jj<bs; jj++) {
555                 baij_a[jj]  = *value++;
556               }
557             }
558           }
559         }
560       }
561     } else {
562       if (!baij->donotstash) {
563         if (roworiented) {
564           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
565         } else {
566           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
567         }
568       }
569     }
570   }
571 #if defined(PETSC_USE_DEBUG)
572   baij->ht_total_ct = total_ct;
573   baij->ht_insert_ct = insert_ct;
574 #endif
575   PetscFunctionReturn(0);
576 }
577 
578 #undef __FUNCT__
579 #define __FUNCT__ "MatGetValues_MPIBAIJ"
580 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
581 {
582   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
583   PetscErrorCode ierr;
584   PetscInt       bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend;
585   PetscInt       bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data;
586 
587   PetscFunctionBegin;
588   for (i=0; i<m; i++) {
589     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
590     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
591     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
592       row = idxm[i] - bsrstart;
593       for (j=0; j<n; j++) {
594         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
595         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
596         if (idxn[j] >= bscstart && idxn[j] < bscend){
597           col = idxn[j] - bscstart;
598           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
599         } else {
600           if (!baij->colmap) {
601             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
602           }
603 #if defined (PETSC_USE_CTABLE)
604           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
605           data --;
606 #else
607           data = baij->colmap[idxn[j]/bs]-1;
608 #endif
609           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
610           else {
611             col  = data + idxn[j]%bs;
612             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
613           }
614         }
615       }
616     } else {
617       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
618     }
619   }
620  PetscFunctionReturn(0);
621 }
622 
623 #undef __FUNCT__
624 #define __FUNCT__ "MatNorm_MPIBAIJ"
625 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
626 {
627   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
628   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
629   PetscErrorCode ierr;
630   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap.bs,nz,row,col;
631   PetscReal      sum = 0.0;
632   MatScalar      *v;
633 
634   PetscFunctionBegin;
635   if (baij->size == 1) {
636     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
637   } else {
638     if (type == NORM_FROBENIUS) {
639       v = amat->a;
640       nz = amat->nz*bs2;
641       for (i=0; i<nz; i++) {
642 #if defined(PETSC_USE_COMPLEX)
643         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
644 #else
645         sum += (*v)*(*v); v++;
646 #endif
647       }
648       v = bmat->a;
649       nz = bmat->nz*bs2;
650       for (i=0; i<nz; i++) {
651 #if defined(PETSC_USE_COMPLEX)
652         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
653 #else
654         sum += (*v)*(*v); v++;
655 #endif
656       }
657       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
658       *nrm = sqrt(*nrm);
659     } else if (type == NORM_1) { /* max column sum */
660       PetscReal *tmp,*tmp2;
661       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
662       ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
663       tmp2 = tmp + mat->cmap.N;
664       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
665       v = amat->a; jj = amat->j;
666       for (i=0; i<amat->nz; i++) {
667         for (j=0; j<bs; j++){
668           col = bs*(cstart + *jj) + j; /* column index */
669           for (row=0; row<bs; row++){
670             tmp[col] += PetscAbsScalar(*v);  v++;
671           }
672         }
673         jj++;
674       }
675       v = bmat->a; jj = bmat->j;
676       for (i=0; i<bmat->nz; i++) {
677         for (j=0; j<bs; j++){
678           col = bs*garray[*jj] + j;
679           for (row=0; row<bs; row++){
680             tmp[col] += PetscAbsScalar(*v); v++;
681           }
682         }
683         jj++;
684       }
685       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
686       *nrm = 0.0;
687       for (j=0; j<mat->cmap.N; j++) {
688         if (tmp2[j] > *nrm) *nrm = tmp2[j];
689       }
690       ierr = PetscFree(tmp);CHKERRQ(ierr);
691     } else if (type == NORM_INFINITY) { /* max row sum */
692       PetscReal *sums;
693       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
694       sum = 0.0;
695       for (j=0; j<amat->mbs; j++) {
696         for (row=0; row<bs; row++) sums[row] = 0.0;
697         v = amat->a + bs2*amat->i[j];
698         nz = amat->i[j+1]-amat->i[j];
699         for (i=0; i<nz; i++) {
700           for (col=0; col<bs; col++){
701             for (row=0; row<bs; row++){
702               sums[row] += PetscAbsScalar(*v); v++;
703             }
704           }
705         }
706         v = bmat->a + bs2*bmat->i[j];
707         nz = bmat->i[j+1]-bmat->i[j];
708         for (i=0; i<nz; i++) {
709           for (col=0; col<bs; col++){
710             for (row=0; row<bs; row++){
711               sums[row] += PetscAbsScalar(*v); v++;
712             }
713           }
714         }
715         for (row=0; row<bs; row++){
716           if (sums[row] > sum) sum = sums[row];
717         }
718       }
719       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
720       ierr = PetscFree(sums);CHKERRQ(ierr);
721     } else {
722       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
723     }
724   }
725   PetscFunctionReturn(0);
726 }
727 
728 /*
729   Creates the hash table, and sets the table
730   This table is created only once.
731   If new entried need to be added to the matrix
732   then the hash table has to be destroyed and
733   recreated.
734 */
735 #undef __FUNCT__
736 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
737 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
738 {
739   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
740   Mat            A = baij->A,B=baij->B;
741   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
742   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
743   PetscErrorCode ierr;
744   PetscInt       size,bs2=baij->bs2,rstart=baij->rstartbs;
745   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
746   PetscInt       *HT,key;
747   MatScalar      **HD;
748   PetscReal      tmp;
749 #if defined(PETSC_USE_INFO)
750   PetscInt       ct=0,max=0;
751 #endif
752 
753   PetscFunctionBegin;
754   baij->ht_size=(PetscInt)(factor*nz);
755   size = baij->ht_size;
756 
757   if (baij->ht) {
758     PetscFunctionReturn(0);
759   }
760 
761   /* Allocate Memory for Hash Table */
762   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
763   baij->ht = (PetscInt*)(baij->hd + size);
764   HD       = baij->hd;
765   HT       = baij->ht;
766 
767 
768   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
769 
770 
771   /* Loop Over A */
772   for (i=0; i<a->mbs; i++) {
773     for (j=ai[i]; j<ai[i+1]; j++) {
774       row = i+rstart;
775       col = aj[j]+cstart;
776 
777       key = row*Nbs + col + 1;
778       h1  = HASH(size,key,tmp);
779       for (k=0; k<size; k++){
780         if (!HT[(h1+k)%size]) {
781           HT[(h1+k)%size] = key;
782           HD[(h1+k)%size] = a->a + j*bs2;
783           break;
784 #if defined(PETSC_USE_INFO)
785         } else {
786           ct++;
787 #endif
788         }
789       }
790 #if defined(PETSC_USE_INFO)
791       if (k> max) max = k;
792 #endif
793     }
794   }
795   /* Loop Over B */
796   for (i=0; i<b->mbs; i++) {
797     for (j=bi[i]; j<bi[i+1]; j++) {
798       row = i+rstart;
799       col = garray[bj[j]];
800       key = row*Nbs + col + 1;
801       h1  = HASH(size,key,tmp);
802       for (k=0; k<size; k++){
803         if (!HT[(h1+k)%size]) {
804           HT[(h1+k)%size] = key;
805           HD[(h1+k)%size] = b->a + j*bs2;
806           break;
807 #if defined(PETSC_USE_INFO)
808         } else {
809           ct++;
810 #endif
811         }
812       }
813 #if defined(PETSC_USE_INFO)
814       if (k> max) max = k;
815 #endif
816     }
817   }
818 
819   /* Print Summary */
820 #if defined(PETSC_USE_INFO)
821   for (i=0,j=0; i<size; i++) {
822     if (HT[i]) {j++;}
823   }
824   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
825 #endif
826   PetscFunctionReturn(0);
827 }
828 
829 #undef __FUNCT__
830 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
831 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
832 {
833   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
834   PetscErrorCode ierr;
835   PetscInt       nstash,reallocs;
836   InsertMode     addv;
837 
838   PetscFunctionBegin;
839   if (baij->donotstash) {
840     PetscFunctionReturn(0);
841   }
842 
843   /* make sure all processors are either in INSERTMODE or ADDMODE */
844   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
845   if (addv == (ADD_VALUES|INSERT_VALUES)) {
846     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
847   }
848   mat->insertmode = addv; /* in case this processor had no cache */
849 
850   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr);
851   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
852   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
853   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
854   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
855   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
856   PetscFunctionReturn(0);
857 }
858 
859 #undef __FUNCT__
860 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
861 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
862 {
863   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
864   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
865   PetscErrorCode ierr;
866   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
867   PetscInt       *row,*col;
868   PetscTruth     r1,r2,r3,other_disassembled;
869   MatScalar      *val;
870   InsertMode     addv = mat->insertmode;
871   PetscMPIInt    n;
872 
873   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
874   PetscFunctionBegin;
875   if (!baij->donotstash) {
876     while (1) {
877       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
878       if (!flg) break;
879 
880       for (i=0; i<n;) {
881         /* Now identify the consecutive vals belonging to the same row */
882         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
883         if (j < n) ncols = j-i;
884         else       ncols = n-i;
885         /* Now assemble all these values with a single function call */
886         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
887         i = j;
888       }
889     }
890     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
891     /* Now process the block-stash. Since the values are stashed column-oriented,
892        set the roworiented flag to column oriented, and after MatSetValues()
893        restore the original flags */
894     r1 = baij->roworiented;
895     r2 = a->roworiented;
896     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
897     baij->roworiented = PETSC_FALSE;
898     a->roworiented    = PETSC_FALSE;
899     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
900     while (1) {
901       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
902       if (!flg) break;
903 
904       for (i=0; i<n;) {
905         /* Now identify the consecutive vals belonging to the same row */
906         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
907         if (j < n) ncols = j-i;
908         else       ncols = n-i;
909         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
910         i = j;
911       }
912     }
913     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
914     baij->roworiented = r1;
915     a->roworiented    = r2;
916     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
917   }
918 
919   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
920   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
921 
922   /* determine if any processor has disassembled, if so we must
923      also disassemble ourselfs, in order that we may reassemble. */
924   /*
925      if nonzero structure of submatrix B cannot change then we know that
926      no processor disassembled thus we can skip this stuff
927   */
928   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
929     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
930     if (mat->was_assembled && !other_disassembled) {
931       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
932     }
933   }
934 
935   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
936     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
937   }
938   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
939   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
940   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
941 
942 #if defined(PETSC_USE_INFO)
943   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
944     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
945     baij->ht_total_ct  = 0;
946     baij->ht_insert_ct = 0;
947   }
948 #endif
949   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
950     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
951     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
952     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
953   }
954 
955   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
956   baij->rowvalues = 0;
957   PetscFunctionReturn(0);
958 }
959 
960 #undef __FUNCT__
961 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
962 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
963 {
964   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
965   PetscErrorCode    ierr;
966   PetscMPIInt       size = baij->size,rank = baij->rank;
967   PetscInt          bs = mat->rmap.bs;
968   PetscTruth        iascii,isdraw;
969   PetscViewer       sviewer;
970   PetscViewerFormat format;
971 
972   PetscFunctionBegin;
973   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
974   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
975   if (iascii) {
976     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
977     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
978       MatInfo info;
979       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
980       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
981       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
982               rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
983               mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr);
984       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
985       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
986       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
987       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
988       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
989       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
990       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
991       PetscFunctionReturn(0);
992     } else if (format == PETSC_VIEWER_ASCII_INFO) {
993       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
994       PetscFunctionReturn(0);
995     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
996       PetscFunctionReturn(0);
997     }
998   }
999 
1000   if (isdraw) {
1001     PetscDraw       draw;
1002     PetscTruth isnull;
1003     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1004     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1005   }
1006 
1007   if (size == 1) {
1008     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1009     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1010   } else {
1011     /* assemble the entire matrix onto first processor. */
1012     Mat         A;
1013     Mat_SeqBAIJ *Aloc;
1014     PetscInt    M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1015     MatScalar   *a;
1016 
1017     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1018     /* Perhaps this should be the type of mat? */
1019     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1020     if (!rank) {
1021       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1022     } else {
1023       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1024     }
1025     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1026     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1027     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1028 
1029     /* copy over the A part */
1030     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1031     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1032     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1033 
1034     for (i=0; i<mbs; i++) {
1035       rvals[0] = bs*(baij->rstartbs + i);
1036       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1037       for (j=ai[i]; j<ai[i+1]; j++) {
1038         col = (baij->cstartbs+aj[j])*bs;
1039         for (k=0; k<bs; k++) {
1040           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1041           col++; a += bs;
1042         }
1043       }
1044     }
1045     /* copy over the B part */
1046     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1047     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1048     for (i=0; i<mbs; i++) {
1049       rvals[0] = bs*(baij->rstartbs + i);
1050       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1051       for (j=ai[i]; j<ai[i+1]; j++) {
1052         col = baij->garray[aj[j]]*bs;
1053         for (k=0; k<bs; k++) {
1054           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1055           col++; a += bs;
1056         }
1057       }
1058     }
1059     ierr = PetscFree(rvals);CHKERRQ(ierr);
1060     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1061     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1062     /*
1063        Everyone has to call to draw the matrix since the graphics waits are
1064        synchronized across all processors that share the PetscDraw object
1065     */
1066     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1067     if (!rank) {
1068       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1069       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1070     }
1071     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1072     ierr = MatDestroy(A);CHKERRQ(ierr);
1073   }
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatView_MPIBAIJ"
1079 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1080 {
1081   PetscErrorCode ierr;
1082   PetscTruth     iascii,isdraw,issocket,isbinary;
1083 
1084   PetscFunctionBegin;
1085   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1086   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1087   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1088   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1089   if (iascii || isdraw || issocket || isbinary) {
1090     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1091   } else {
1092     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1093   }
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 #undef __FUNCT__
1098 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1099 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1100 {
1101   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1102   PetscErrorCode ierr;
1103 
1104   PetscFunctionBegin;
1105 #if defined(PETSC_USE_LOG)
1106   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N);
1107 #endif
1108   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1109   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1110   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1111   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1112 #if defined (PETSC_USE_CTABLE)
1113   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1114 #else
1115   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1116 #endif
1117   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1118   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1119   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1120   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1121   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1122   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1123   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1124   ierr = PetscFree(baij);CHKERRQ(ierr);
1125 
1126   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1127   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1128   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1129   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1130   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1131   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1132   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1133   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNCT__
1138 #define __FUNCT__ "MatMult_MPIBAIJ"
1139 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1140 {
1141   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1142   PetscErrorCode ierr;
1143   PetscInt       nt;
1144 
1145   PetscFunctionBegin;
1146   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1147   if (nt != A->cmap.n) {
1148     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1149   }
1150   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1151   if (nt != A->rmap.n) {
1152     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1153   }
1154   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1155   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1156   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1157   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 #undef __FUNCT__
1162 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1163 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1164 {
1165   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1166   PetscErrorCode ierr;
1167 
1168   PetscFunctionBegin;
1169   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1170   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1171   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1172   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1178 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1179 {
1180   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1181   PetscErrorCode ierr;
1182   PetscTruth     merged;
1183 
1184   PetscFunctionBegin;
1185   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1186   /* do nondiagonal part */
1187   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1188   if (!merged) {
1189     /* send it on its way */
1190     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1191     /* do local part */
1192     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1193     /* receive remote parts: note this assumes the values are not actually */
1194     /* inserted in yy until the next line */
1195     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1196   } else {
1197     /* do local part */
1198     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1199     /* send it on its way */
1200     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1201     /* values actually were received in the Begin() but we need to call this nop */
1202     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1203   }
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 #undef __FUNCT__
1208 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1209 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1210 {
1211   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1212   PetscErrorCode ierr;
1213 
1214   PetscFunctionBegin;
1215   /* do nondiagonal part */
1216   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1217   /* send it on its way */
1218   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1219   /* do local part */
1220   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1221   /* receive remote parts: note this assumes the values are not actually */
1222   /* inserted in yy until the next line, which is true for my implementation*/
1223   /* but is not perhaps always true. */
1224   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 /*
1229   This only works correctly for square matrices where the subblock A->A is the
1230    diagonal block
1231 */
1232 #undef __FUNCT__
1233 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1234 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1235 {
1236   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1237   PetscErrorCode ierr;
1238 
1239   PetscFunctionBegin;
1240   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1241   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 #undef __FUNCT__
1246 #define __FUNCT__ "MatScale_MPIBAIJ"
1247 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1248 {
1249   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1250   PetscErrorCode ierr;
1251 
1252   PetscFunctionBegin;
1253   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1254   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1260 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1261 {
1262   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1263   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1264   PetscErrorCode ierr;
1265   PetscInt       bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1266   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend;
1267   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1268 
1269   PetscFunctionBegin;
1270   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1271   mat->getrowactive = PETSC_TRUE;
1272 
1273   if (!mat->rowvalues && (idx || v)) {
1274     /*
1275         allocate enough space to hold information from the longest row.
1276     */
1277     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1278     PetscInt     max = 1,mbs = mat->mbs,tmp;
1279     for (i=0; i<mbs; i++) {
1280       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1281       if (max < tmp) { max = tmp; }
1282     }
1283     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1284     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1285   }
1286 
1287   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1288   lrow = row - brstart;
1289 
1290   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1291   if (!v)   {pvA = 0; pvB = 0;}
1292   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1293   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1294   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1295   nztot = nzA + nzB;
1296 
1297   cmap  = mat->garray;
1298   if (v  || idx) {
1299     if (nztot) {
1300       /* Sort by increasing column numbers, assuming A and B already sorted */
1301       PetscInt imark = -1;
1302       if (v) {
1303         *v = v_p = mat->rowvalues;
1304         for (i=0; i<nzB; i++) {
1305           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1306           else break;
1307         }
1308         imark = i;
1309         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1310         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1311       }
1312       if (idx) {
1313         *idx = idx_p = mat->rowindices;
1314         if (imark > -1) {
1315           for (i=0; i<imark; i++) {
1316             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1317           }
1318         } else {
1319           for (i=0; i<nzB; i++) {
1320             if (cmap[cworkB[i]/bs] < cstart)
1321               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1322             else break;
1323           }
1324           imark = i;
1325         }
1326         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1327         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1328       }
1329     } else {
1330       if (idx) *idx = 0;
1331       if (v)   *v   = 0;
1332     }
1333   }
1334   *nz = nztot;
1335   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1336   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1342 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1343 {
1344   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1345 
1346   PetscFunctionBegin;
1347   if (!baij->getrowactive) {
1348     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1349   }
1350   baij->getrowactive = PETSC_FALSE;
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 #undef __FUNCT__
1355 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1356 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1357 {
1358   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1359   PetscErrorCode ierr;
1360 
1361   PetscFunctionBegin;
1362   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1363   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 #undef __FUNCT__
1368 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1369 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1370 {
1371   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1372   Mat            A = a->A,B = a->B;
1373   PetscErrorCode ierr;
1374   PetscReal      isend[5],irecv[5];
1375 
1376   PetscFunctionBegin;
1377   info->block_size     = (PetscReal)matin->rmap.bs;
1378   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1379   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1380   isend[3] = info->memory;  isend[4] = info->mallocs;
1381   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1382   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1383   isend[3] += info->memory;  isend[4] += info->mallocs;
1384   if (flag == MAT_LOCAL) {
1385     info->nz_used      = isend[0];
1386     info->nz_allocated = isend[1];
1387     info->nz_unneeded  = isend[2];
1388     info->memory       = isend[3];
1389     info->mallocs      = isend[4];
1390   } else if (flag == MAT_GLOBAL_MAX) {
1391     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1392     info->nz_used      = irecv[0];
1393     info->nz_allocated = irecv[1];
1394     info->nz_unneeded  = irecv[2];
1395     info->memory       = irecv[3];
1396     info->mallocs      = irecv[4];
1397   } else if (flag == MAT_GLOBAL_SUM) {
1398     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1399     info->nz_used      = irecv[0];
1400     info->nz_allocated = irecv[1];
1401     info->nz_unneeded  = irecv[2];
1402     info->memory       = irecv[3];
1403     info->mallocs      = irecv[4];
1404   } else {
1405     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1406   }
1407   info->rows_global       = (PetscReal)A->rmap.N;
1408   info->columns_global    = (PetscReal)A->cmap.N;
1409   info->rows_local        = (PetscReal)A->rmap.N;
1410   info->columns_local     = (PetscReal)A->cmap.N;
1411   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1412   info->fill_ratio_needed = 0;
1413   info->factor_mallocs    = 0;
1414   PetscFunctionReturn(0);
1415 }
1416 
1417 #undef __FUNCT__
1418 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1419 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg)
1420 {
1421   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1422   PetscErrorCode ierr;
1423 
1424   PetscFunctionBegin;
1425   switch (op) {
1426   case MAT_NEW_NONZERO_LOCATIONS:
1427   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1428   case MAT_KEEP_ZEROED_ROWS:
1429   case MAT_NEW_NONZERO_LOCATION_ERR:
1430     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1431     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1432     break;
1433   case MAT_ROW_ORIENTED:
1434     a->roworiented = flg;
1435     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1436     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1437     break;
1438   case MAT_NEW_DIAGONALS:
1439     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1440     break;
1441   case MAT_IGNORE_OFF_PROC_ENTRIES:
1442     a->donotstash = flg;
1443     break;
1444   case MAT_USE_HASH_TABLE:
1445     a->ht_flag = flg;
1446     break;
1447   case MAT_SYMMETRIC:
1448   case MAT_STRUCTURALLY_SYMMETRIC:
1449   case MAT_HERMITIAN:
1450   case MAT_SYMMETRY_ETERNAL:
1451     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1452     break;
1453   default:
1454     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1455   }
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 #undef __FUNCT__
1460 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1461 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1462 {
1463   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1464   Mat_SeqBAIJ    *Aloc;
1465   Mat            B;
1466   PetscErrorCode ierr;
1467   PetscInt       M=A->rmap.N,N=A->cmap.N,*ai,*aj,i,*rvals,j,k,col;
1468   PetscInt       bs=A->rmap.bs,mbs=baij->mbs;
1469   MatScalar      *a;
1470 
1471   PetscFunctionBegin;
1472   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1473   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1474   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1475   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1476   ierr = MatMPIBAIJSetPreallocation(B,A->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1477 
1478   /* copy over the A part */
1479   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1480   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1481   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1482 
1483   for (i=0; i<mbs; i++) {
1484     rvals[0] = bs*(baij->rstartbs + i);
1485     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1486     for (j=ai[i]; j<ai[i+1]; j++) {
1487       col = (baij->cstartbs+aj[j])*bs;
1488       for (k=0; k<bs; k++) {
1489         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1490         col++; a += bs;
1491       }
1492     }
1493   }
1494   /* copy over the B part */
1495   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1496   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1497   for (i=0; i<mbs; i++) {
1498     rvals[0] = bs*(baij->rstartbs + i);
1499     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1500     for (j=ai[i]; j<ai[i+1]; j++) {
1501       col = baij->garray[aj[j]]*bs;
1502       for (k=0; k<bs; k++) {
1503         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1504         col++; a += bs;
1505       }
1506     }
1507   }
1508   ierr = PetscFree(rvals);CHKERRQ(ierr);
1509   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1510   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1511 
1512   if (matout) {
1513     *matout = B;
1514   } else {
1515     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1516   }
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 #undef __FUNCT__
1521 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1522 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1523 {
1524   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1525   Mat            a = baij->A,b = baij->B;
1526   PetscErrorCode ierr;
1527   PetscInt       s1,s2,s3;
1528 
1529   PetscFunctionBegin;
1530   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1531   if (rr) {
1532     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1533     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1534     /* Overlap communication with computation. */
1535     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1536   }
1537   if (ll) {
1538     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1539     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1540     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1541   }
1542   /* scale  the diagonal block */
1543   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1544 
1545   if (rr) {
1546     /* Do a scatter end and then right scale the off-diagonal block */
1547     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1548     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1549   }
1550 
1551   PetscFunctionReturn(0);
1552 }
1553 
1554 #undef __FUNCT__
1555 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1556 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1557 {
1558   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1559   PetscErrorCode ierr;
1560   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1561   PetscInt       i,*owners = A->rmap.range;
1562   PetscInt       *nprocs,j,idx,nsends,row;
1563   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1564   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1565   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap.rstart;
1566   MPI_Comm       comm = ((PetscObject)A)->comm;
1567   MPI_Request    *send_waits,*recv_waits;
1568   MPI_Status     recv_status,*send_status;
1569 #if defined(PETSC_DEBUG)
1570   PetscTruth     found = PETSC_FALSE;
1571 #endif
1572 
1573   PetscFunctionBegin;
1574   /*  first count number of contributors to each processor */
1575   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1576   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1577   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1578   j = 0;
1579   for (i=0; i<N; i++) {
1580     if (lastidx > (idx = rows[i])) j = 0;
1581     lastidx = idx;
1582     for (; j<size; j++) {
1583       if (idx >= owners[j] && idx < owners[j+1]) {
1584         nprocs[2*j]++;
1585         nprocs[2*j+1] = 1;
1586         owner[i] = j;
1587 #if defined(PETSC_DEBUG)
1588         found = PETSC_TRUE;
1589 #endif
1590         break;
1591       }
1592     }
1593 #if defined(PETSC_DEBUG)
1594     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1595     found = PETSC_FALSE;
1596 #endif
1597   }
1598   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1599 
1600   /* inform other processors of number of messages and max length*/
1601   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1602 
1603   /* post receives:   */
1604   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1605   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1606   for (i=0; i<nrecvs; i++) {
1607     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1608   }
1609 
1610   /* do sends:
1611      1) starts[i] gives the starting index in svalues for stuff going to
1612      the ith processor
1613   */
1614   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1615   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1616   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1617   starts[0]  = 0;
1618   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1619   for (i=0; i<N; i++) {
1620     svalues[starts[owner[i]]++] = rows[i];
1621   }
1622 
1623   starts[0] = 0;
1624   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1625   count = 0;
1626   for (i=0; i<size; i++) {
1627     if (nprocs[2*i+1]) {
1628       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1629     }
1630   }
1631   ierr = PetscFree(starts);CHKERRQ(ierr);
1632 
1633   base = owners[rank];
1634 
1635   /*  wait on receives */
1636   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
1637   source = lens + nrecvs;
1638   count  = nrecvs; slen = 0;
1639   while (count) {
1640     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1641     /* unpack receives into our local space */
1642     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1643     source[imdex]  = recv_status.MPI_SOURCE;
1644     lens[imdex]    = n;
1645     slen          += n;
1646     count--;
1647   }
1648   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1649 
1650   /* move the data into the send scatter */
1651   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1652   count = 0;
1653   for (i=0; i<nrecvs; i++) {
1654     values = rvalues + i*nmax;
1655     for (j=0; j<lens[i]; j++) {
1656       lrows[count++] = values[j] - base;
1657     }
1658   }
1659   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1660   ierr = PetscFree(lens);CHKERRQ(ierr);
1661   ierr = PetscFree(owner);CHKERRQ(ierr);
1662   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1663 
1664   /* actually zap the local rows */
1665   /*
1666         Zero the required rows. If the "diagonal block" of the matrix
1667      is square and the user wishes to set the diagonal we use separate
1668      code so that MatSetValues() is not called for each diagonal allocating
1669      new memory, thus calling lots of mallocs and slowing things down.
1670 
1671        Contributed by: Matthew Knepley
1672   */
1673   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1674   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1675   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
1676     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1677   } else if (diag != 0.0) {
1678     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1679     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1680       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1681 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1682     }
1683     for (i=0; i<slen; i++) {
1684       row  = lrows[i] + rstart_bs;
1685       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1686     }
1687     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1688     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1689   } else {
1690     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1691   }
1692 
1693   ierr = PetscFree(lrows);CHKERRQ(ierr);
1694 
1695   /* wait on sends */
1696   if (nsends) {
1697     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1698     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1699     ierr = PetscFree(send_status);CHKERRQ(ierr);
1700   }
1701   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1702   ierr = PetscFree(svalues);CHKERRQ(ierr);
1703 
1704   PetscFunctionReturn(0);
1705 }
1706 
1707 #undef __FUNCT__
1708 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1709 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1710 {
1711   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1712   PetscErrorCode ierr;
1713 
1714   PetscFunctionBegin;
1715   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1716   PetscFunctionReturn(0);
1717 }
1718 
1719 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1720 
1721 #undef __FUNCT__
1722 #define __FUNCT__ "MatEqual_MPIBAIJ"
1723 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1724 {
1725   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1726   Mat            a,b,c,d;
1727   PetscTruth     flg;
1728   PetscErrorCode ierr;
1729 
1730   PetscFunctionBegin;
1731   a = matA->A; b = matA->B;
1732   c = matB->A; d = matB->B;
1733 
1734   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1735   if (flg) {
1736     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1737   }
1738   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1739   PetscFunctionReturn(0);
1740 }
1741 
1742 #undef __FUNCT__
1743 #define __FUNCT__ "MatCopy_MPIBAIJ"
1744 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1745 {
1746   PetscErrorCode ierr;
1747   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1748   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1749 
1750   PetscFunctionBegin;
1751   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1752   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1753     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1754   } else {
1755     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1756     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1757   }
1758   PetscFunctionReturn(0);
1759 }
1760 
1761 #undef __FUNCT__
1762 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1763 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1764 {
1765   PetscErrorCode ierr;
1766 
1767   PetscFunctionBegin;
1768   ierr =  MatMPIBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1769   PetscFunctionReturn(0);
1770 }
1771 
1772 #include "petscblaslapack.h"
1773 #undef __FUNCT__
1774 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1775 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1776 {
1777   PetscErrorCode ierr;
1778   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1779   PetscBLASInt   bnz,one=1;
1780   Mat_SeqBAIJ    *x,*y;
1781 
1782   PetscFunctionBegin;
1783   if (str == SAME_NONZERO_PATTERN) {
1784     PetscScalar alpha = a;
1785     x = (Mat_SeqBAIJ *)xx->A->data;
1786     y = (Mat_SeqBAIJ *)yy->A->data;
1787     bnz = PetscBLASIntCast(x->nz);
1788     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1789     x = (Mat_SeqBAIJ *)xx->B->data;
1790     y = (Mat_SeqBAIJ *)yy->B->data;
1791     bnz = PetscBLASIntCast(x->nz);
1792     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1793   } else {
1794     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1795   }
1796   PetscFunctionReturn(0);
1797 }
1798 
1799 #undef __FUNCT__
1800 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1801 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1802 {
1803   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1804   PetscErrorCode ierr;
1805 
1806   PetscFunctionBegin;
1807   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1808   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1809   PetscFunctionReturn(0);
1810 }
1811 
1812 #undef __FUNCT__
1813 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1814 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1815 {
1816   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1817   PetscErrorCode ierr;
1818 
1819   PetscFunctionBegin;
1820   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1821   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1822   PetscFunctionReturn(0);
1823 }
1824 
1825 /* -------------------------------------------------------------------*/
1826 static struct _MatOps MatOps_Values = {
1827        MatSetValues_MPIBAIJ,
1828        MatGetRow_MPIBAIJ,
1829        MatRestoreRow_MPIBAIJ,
1830        MatMult_MPIBAIJ,
1831 /* 4*/ MatMultAdd_MPIBAIJ,
1832        MatMultTranspose_MPIBAIJ,
1833        MatMultTransposeAdd_MPIBAIJ,
1834        0,
1835        0,
1836        0,
1837 /*10*/ 0,
1838        0,
1839        0,
1840        0,
1841        MatTranspose_MPIBAIJ,
1842 /*15*/ MatGetInfo_MPIBAIJ,
1843        MatEqual_MPIBAIJ,
1844        MatGetDiagonal_MPIBAIJ,
1845        MatDiagonalScale_MPIBAIJ,
1846        MatNorm_MPIBAIJ,
1847 /*20*/ MatAssemblyBegin_MPIBAIJ,
1848        MatAssemblyEnd_MPIBAIJ,
1849        0,
1850        MatSetOption_MPIBAIJ,
1851        MatZeroEntries_MPIBAIJ,
1852 /*25*/ MatZeroRows_MPIBAIJ,
1853        0,
1854        0,
1855        0,
1856        0,
1857 /*30*/ MatSetUpPreallocation_MPIBAIJ,
1858        0,
1859        0,
1860        0,
1861        0,
1862 /*35*/ MatDuplicate_MPIBAIJ,
1863        0,
1864        0,
1865        0,
1866        0,
1867 /*40*/ MatAXPY_MPIBAIJ,
1868        MatGetSubMatrices_MPIBAIJ,
1869        MatIncreaseOverlap_MPIBAIJ,
1870        MatGetValues_MPIBAIJ,
1871        MatCopy_MPIBAIJ,
1872 /*45*/ 0,
1873        MatScale_MPIBAIJ,
1874        0,
1875        0,
1876        0,
1877 /*50*/ 0,
1878        0,
1879        0,
1880        0,
1881        0,
1882 /*55*/ 0,
1883        0,
1884        MatSetUnfactored_MPIBAIJ,
1885        0,
1886        MatSetValuesBlocked_MPIBAIJ,
1887 /*60*/ 0,
1888        MatDestroy_MPIBAIJ,
1889        MatView_MPIBAIJ,
1890        0,
1891        0,
1892 /*65*/ 0,
1893        0,
1894        0,
1895        0,
1896        0,
1897 /*70*/ MatGetRowMaxAbs_MPIBAIJ,
1898        0,
1899        0,
1900        0,
1901        0,
1902 /*75*/ 0,
1903        0,
1904        0,
1905        0,
1906        0,
1907 /*80*/ 0,
1908        0,
1909        0,
1910        0,
1911        MatLoad_MPIBAIJ,
1912 /*85*/ 0,
1913        0,
1914        0,
1915        0,
1916        0,
1917 /*90*/ 0,
1918        0,
1919        0,
1920        0,
1921        0,
1922 /*95*/ 0,
1923        0,
1924        0,
1925        0,
1926        0,
1927 /*100*/0,
1928        0,
1929        0,
1930        0,
1931        0,
1932 /*105*/0,
1933        MatRealPart_MPIBAIJ,
1934        MatImaginaryPart_MPIBAIJ};
1935 
1936 
1937 EXTERN_C_BEGIN
1938 #undef __FUNCT__
1939 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
1940 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1941 {
1942   PetscFunctionBegin;
1943   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1944   *iscopy = PETSC_FALSE;
1945   PetscFunctionReturn(0);
1946 }
1947 EXTERN_C_END
1948 
1949 EXTERN_C_BEGIN
1950 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
1951 EXTERN_C_END
1952 
1953 #undef __FUNCT__
1954 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
1955 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
1956 {
1957   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)B->data;
1958   PetscInt       m = B->rmap.n/bs,cstart = baij->cstartbs, cend = baij->cendbs,j,nnz,i,d;
1959   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = baij->rstartbs,ii;
1960   const PetscInt *JJ;
1961   PetscScalar    *values;
1962   PetscErrorCode ierr;
1963 
1964   PetscFunctionBegin;
1965 #if defined(PETSC_OPT_g)
1966   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"Ii[0] must be 0 it is %D",Ii[0]);
1967 #endif
1968   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1969   o_nnz = d_nnz + m;
1970 
1971   for (i=0; i<m; i++) {
1972     nnz     = Ii[i+1]- Ii[i];
1973     JJ      = J + Ii[i];
1974     nnz_max = PetscMax(nnz_max,nnz);
1975 #if defined(PETSC_OPT_g)
1976     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
1977 #endif
1978     for (j=0; j<nnz; j++) {
1979       if (*JJ >= cstart) break;
1980       JJ++;
1981     }
1982     d = 0;
1983     for (; j<nnz; j++) {
1984       if (*JJ++ >= cend) break;
1985       d++;
1986     }
1987     d_nnz[i] = d;
1988     o_nnz[i] = nnz - d;
1989   }
1990   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1991   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1992 
1993   if (v) values = (PetscScalar*)v;
1994   else {
1995     ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1996     ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1997   }
1998 
1999   for (i=0; i<m; i++) {
2000     ii   = i + rstart;
2001     nnz  = Ii[i+1]- Ii[i];
2002     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2003   }
2004   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2005   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2006 
2007   if (!v) {
2008     ierr = PetscFree(values);CHKERRQ(ierr);
2009   }
2010   PetscFunctionReturn(0);
2011 }
2012 
2013 #undef __FUNCT__
2014 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2015 /*@C
2016    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2017    (the default parallel PETSc format).
2018 
2019    Collective on MPI_Comm
2020 
2021    Input Parameters:
2022 +  A - the matrix
2023 .  i - the indices into j for the start of each local row (starts with zero)
2024 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2025 -  v - optional values in the matrix
2026 
2027    Level: developer
2028 
2029 .keywords: matrix, aij, compressed row, sparse, parallel
2030 
2031 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2032 @*/
2033 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2034 {
2035   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2036 
2037   PetscFunctionBegin;
2038   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2039   if (f) {
2040     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2041   }
2042   PetscFunctionReturn(0);
2043 }
2044 
2045 EXTERN_C_BEGIN
2046 #undef __FUNCT__
2047 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2048 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2049 {
2050   Mat_MPIBAIJ    *b;
2051   PetscErrorCode ierr;
2052   PetscInt       i;
2053 
2054   PetscFunctionBegin;
2055   B->preallocated = PETSC_TRUE;
2056   ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2057     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2058   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2059 
2060   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2061   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2062   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2063   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2064   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2065 
2066   B->rmap.bs  = bs;
2067   B->cmap.bs  = bs;
2068   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2069   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2070 
2071   if (d_nnz) {
2072     for (i=0; i<B->rmap.n/bs; i++) {
2073       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2074     }
2075   }
2076   if (o_nnz) {
2077     for (i=0; i<B->rmap.n/bs; i++) {
2078       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2079     }
2080   }
2081 
2082   b = (Mat_MPIBAIJ*)B->data;
2083   b->bs2 = bs*bs;
2084   b->mbs = B->rmap.n/bs;
2085   b->nbs = B->cmap.n/bs;
2086   b->Mbs = B->rmap.N/bs;
2087   b->Nbs = B->cmap.N/bs;
2088 
2089   for (i=0; i<=b->size; i++) {
2090     b->rangebs[i] = B->rmap.range[i]/bs;
2091   }
2092   b->rstartbs = B->rmap.rstart/bs;
2093   b->rendbs   = B->rmap.rend/bs;
2094   b->cstartbs = B->cmap.rstart/bs;
2095   b->cendbs   = B->cmap.rend/bs;
2096 
2097   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2098   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2099   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2100   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2101   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2102   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2103   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2104   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2105   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2106   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2107 
2108   ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
2109 
2110   PetscFunctionReturn(0);
2111 }
2112 EXTERN_C_END
2113 
2114 EXTERN_C_BEGIN
2115 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2116 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2117 EXTERN_C_END
2118 
2119 /*MC
2120    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2121 
2122    Options Database Keys:
2123 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2124 . -mat_block_size <bs> - set the blocksize used to store the matrix
2125 - -mat_use_hash_table <fact>
2126 
2127   Level: beginner
2128 
2129 .seealso: MatCreateMPIBAIJ
2130 M*/
2131 
2132 EXTERN_C_BEGIN
2133 #undef __FUNCT__
2134 #define __FUNCT__ "MatCreate_MPIBAIJ"
2135 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2136 {
2137   Mat_MPIBAIJ    *b;
2138   PetscErrorCode ierr;
2139   PetscTruth     flg;
2140 
2141   PetscFunctionBegin;
2142   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2143   B->data = (void*)b;
2144 
2145 
2146   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2147   B->mapping    = 0;
2148   B->factor     = 0;
2149   B->assembled  = PETSC_FALSE;
2150 
2151   B->insertmode = NOT_SET_VALUES;
2152   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
2153   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
2154 
2155   /* build local table of row and column ownerships */
2156   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
2157 
2158   /* build cache for off array entries formed */
2159   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
2160   b->donotstash  = PETSC_FALSE;
2161   b->colmap      = PETSC_NULL;
2162   b->garray      = PETSC_NULL;
2163   b->roworiented = PETSC_TRUE;
2164 
2165   /* stuff used in block assembly */
2166   b->barray       = 0;
2167 
2168   /* stuff used for matrix vector multiply */
2169   b->lvec         = 0;
2170   b->Mvctx        = 0;
2171 
2172   /* stuff for MatGetRow() */
2173   b->rowindices   = 0;
2174   b->rowvalues    = 0;
2175   b->getrowactive = PETSC_FALSE;
2176 
2177   /* hash table stuff */
2178   b->ht           = 0;
2179   b->hd           = 0;
2180   b->ht_size      = 0;
2181   b->ht_flag      = PETSC_FALSE;
2182   b->ht_fact      = 0;
2183   b->ht_total_ct  = 0;
2184   b->ht_insert_ct = 0;
2185 
2186   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2187     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2188     if (flg) {
2189       PetscReal fact = 1.39;
2190       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2191       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
2192       if (fact <= 1.0) fact = 1.39;
2193       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2194       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2195     }
2196   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2197 
2198   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2199                                      "MatStoreValues_MPIBAIJ",
2200                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2201   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2202                                      "MatRetrieveValues_MPIBAIJ",
2203                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2204   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2205                                      "MatGetDiagonalBlock_MPIBAIJ",
2206                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2207   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2208                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2209                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2210   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2211 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
2212 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2213   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2214                                      "MatDiagonalScaleLocal_MPIBAIJ",
2215                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2216   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2217                                      "MatSetHashTableFactor_MPIBAIJ",
2218                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2219   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
2220   PetscFunctionReturn(0);
2221 }
2222 EXTERN_C_END
2223 
2224 /*MC
2225    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2226 
2227    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2228    and MATMPIBAIJ otherwise.
2229 
2230    Options Database Keys:
2231 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2232 
2233   Level: beginner
2234 
2235 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2236 M*/
2237 
2238 EXTERN_C_BEGIN
2239 #undef __FUNCT__
2240 #define __FUNCT__ "MatCreate_BAIJ"
2241 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
2242 {
2243   PetscErrorCode ierr;
2244   PetscMPIInt    size;
2245 
2246   PetscFunctionBegin;
2247   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2248   if (size == 1) {
2249     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2250   } else {
2251     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2252   }
2253   PetscFunctionReturn(0);
2254 }
2255 EXTERN_C_END
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2259 /*@C
2260    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2261    (block compressed row).  For good matrix assembly performance
2262    the user should preallocate the matrix storage by setting the parameters
2263    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2264    performance can be increased by more than a factor of 50.
2265 
2266    Collective on Mat
2267 
2268    Input Parameters:
2269 +  A - the matrix
2270 .  bs   - size of blockk
2271 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2272            submatrix  (same for all local rows)
2273 .  d_nnz - array containing the number of block nonzeros in the various block rows
2274            of the in diagonal portion of the local (possibly different for each block
2275            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2276 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2277            submatrix (same for all local rows).
2278 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2279            off-diagonal portion of the local submatrix (possibly different for
2280            each block row) or PETSC_NULL.
2281 
2282    If the *_nnz parameter is given then the *_nz parameter is ignored
2283 
2284    Options Database Keys:
2285 +   -mat_block_size - size of the blocks to use
2286 -   -mat_use_hash_table <fact>
2287 
2288    Notes:
2289    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2290    than it must be used on all processors that share the object for that argument.
2291 
2292    Storage Information:
2293    For a square global matrix we define each processor's diagonal portion
2294    to be its local rows and the corresponding columns (a square submatrix);
2295    each processor's off-diagonal portion encompasses the remainder of the
2296    local matrix (a rectangular submatrix).
2297 
2298    The user can specify preallocated storage for the diagonal part of
2299    the local submatrix with either d_nz or d_nnz (not both).  Set
2300    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2301    memory allocation.  Likewise, specify preallocated storage for the
2302    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2303 
2304    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2305    the figure below we depict these three local rows and all columns (0-11).
2306 
2307 .vb
2308            0 1 2 3 4 5 6 7 8 9 10 11
2309           -------------------
2310    row 3  |  o o o d d d o o o o o o
2311    row 4  |  o o o d d d o o o o o o
2312    row 5  |  o o o d d d o o o o o o
2313           -------------------
2314 .ve
2315 
2316    Thus, any entries in the d locations are stored in the d (diagonal)
2317    submatrix, and any entries in the o locations are stored in the
2318    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2319    stored simply in the MATSEQBAIJ format for compressed row storage.
2320 
2321    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2322    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2323    In general, for PDE problems in which most nonzeros are near the diagonal,
2324    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2325    or you will get TERRIBLE performance; see the users' manual chapter on
2326    matrices.
2327 
2328    You can call MatGetInfo() to get information on how effective the preallocation was;
2329    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2330    You can also run with the option -info and look for messages with the string
2331    malloc in them to see if additional memory allocation was needed.
2332 
2333    Level: intermediate
2334 
2335 .keywords: matrix, block, aij, compressed row, sparse, parallel
2336 
2337 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2338 @*/
2339 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2340 {
2341   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2342 
2343   PetscFunctionBegin;
2344   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2345   if (f) {
2346     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2347   }
2348   PetscFunctionReturn(0);
2349 }
2350 
2351 #undef __FUNCT__
2352 #define __FUNCT__ "MatCreateMPIBAIJ"
2353 /*@C
2354    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2355    (block compressed row).  For good matrix assembly performance
2356    the user should preallocate the matrix storage by setting the parameters
2357    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2358    performance can be increased by more than a factor of 50.
2359 
2360    Collective on MPI_Comm
2361 
2362    Input Parameters:
2363 +  comm - MPI communicator
2364 .  bs   - size of blockk
2365 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2366            This value should be the same as the local size used in creating the
2367            y vector for the matrix-vector product y = Ax.
2368 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2369            This value should be the same as the local size used in creating the
2370            x vector for the matrix-vector product y = Ax.
2371 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2372 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2373 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2374            submatrix  (same for all local rows)
2375 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2376            of the in diagonal portion of the local (possibly different for each block
2377            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2378 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2379            submatrix (same for all local rows).
2380 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2381            off-diagonal portion of the local submatrix (possibly different for
2382            each block row) or PETSC_NULL.
2383 
2384    Output Parameter:
2385 .  A - the matrix
2386 
2387    Options Database Keys:
2388 +   -mat_block_size - size of the blocks to use
2389 -   -mat_use_hash_table <fact>
2390 
2391    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2392    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
2393    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
2394    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2395 
2396    Notes:
2397    If the *_nnz parameter is given then the *_nz parameter is ignored
2398 
2399    A nonzero block is any block that as 1 or more nonzeros in it
2400 
2401    The user MUST specify either the local or global matrix dimensions
2402    (possibly both).
2403 
2404    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2405    than it must be used on all processors that share the object for that argument.
2406 
2407    Storage Information:
2408    For a square global matrix we define each processor's diagonal portion
2409    to be its local rows and the corresponding columns (a square submatrix);
2410    each processor's off-diagonal portion encompasses the remainder of the
2411    local matrix (a rectangular submatrix).
2412 
2413    The user can specify preallocated storage for the diagonal part of
2414    the local submatrix with either d_nz or d_nnz (not both).  Set
2415    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2416    memory allocation.  Likewise, specify preallocated storage for the
2417    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2418 
2419    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2420    the figure below we depict these three local rows and all columns (0-11).
2421 
2422 .vb
2423            0 1 2 3 4 5 6 7 8 9 10 11
2424           -------------------
2425    row 3  |  o o o d d d o o o o o o
2426    row 4  |  o o o d d d o o o o o o
2427    row 5  |  o o o d d d o o o o o o
2428           -------------------
2429 .ve
2430 
2431    Thus, any entries in the d locations are stored in the d (diagonal)
2432    submatrix, and any entries in the o locations are stored in the
2433    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2434    stored simply in the MATSEQBAIJ format for compressed row storage.
2435 
2436    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2437    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2438    In general, for PDE problems in which most nonzeros are near the diagonal,
2439    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2440    or you will get TERRIBLE performance; see the users' manual chapter on
2441    matrices.
2442 
2443    Level: intermediate
2444 
2445 .keywords: matrix, block, aij, compressed row, sparse, parallel
2446 
2447 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2448 @*/
2449 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2450 {
2451   PetscErrorCode ierr;
2452   PetscMPIInt    size;
2453 
2454   PetscFunctionBegin;
2455   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2456   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2457   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2458   if (size > 1) {
2459     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2460     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2461   } else {
2462     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2463     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2464   }
2465   PetscFunctionReturn(0);
2466 }
2467 
2468 #undef __FUNCT__
2469 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2470 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2471 {
2472   Mat            mat;
2473   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2474   PetscErrorCode ierr;
2475   PetscInt       len=0;
2476 
2477   PetscFunctionBegin;
2478   *newmat       = 0;
2479   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2480   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2481   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2482   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2483 
2484   mat->factor       = matin->factor;
2485   mat->preallocated = PETSC_TRUE;
2486   mat->assembled    = PETSC_TRUE;
2487   mat->insertmode   = NOT_SET_VALUES;
2488 
2489   a      = (Mat_MPIBAIJ*)mat->data;
2490   mat->rmap.bs  = matin->rmap.bs;
2491   a->bs2   = oldmat->bs2;
2492   a->mbs   = oldmat->mbs;
2493   a->nbs   = oldmat->nbs;
2494   a->Mbs   = oldmat->Mbs;
2495   a->Nbs   = oldmat->Nbs;
2496 
2497   ierr = PetscMapCopy(((PetscObject)matin)->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2498   ierr = PetscMapCopy(((PetscObject)matin)->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2499 
2500   a->size         = oldmat->size;
2501   a->rank         = oldmat->rank;
2502   a->donotstash   = oldmat->donotstash;
2503   a->roworiented  = oldmat->roworiented;
2504   a->rowindices   = 0;
2505   a->rowvalues    = 0;
2506   a->getrowactive = PETSC_FALSE;
2507   a->barray       = 0;
2508   a->rstartbs     = oldmat->rstartbs;
2509   a->rendbs       = oldmat->rendbs;
2510   a->cstartbs     = oldmat->cstartbs;
2511   a->cendbs       = oldmat->cendbs;
2512 
2513   /* hash table stuff */
2514   a->ht           = 0;
2515   a->hd           = 0;
2516   a->ht_size      = 0;
2517   a->ht_flag      = oldmat->ht_flag;
2518   a->ht_fact      = oldmat->ht_fact;
2519   a->ht_total_ct  = 0;
2520   a->ht_insert_ct = 0;
2521 
2522   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
2523   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2524   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr);
2525   if (oldmat->colmap) {
2526 #if defined (PETSC_USE_CTABLE)
2527   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2528 #else
2529   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2530   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2531   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2532 #endif
2533   } else a->colmap = 0;
2534 
2535   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2536     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2537     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2538     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2539   } else a->garray = 0;
2540 
2541   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2542   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2543   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2544   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2545 
2546   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2547   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2548   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2549   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2550   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2551   *newmat = mat;
2552 
2553   PetscFunctionReturn(0);
2554 }
2555 
2556 #include "petscsys.h"
2557 
2558 #undef __FUNCT__
2559 #define __FUNCT__ "MatLoad_MPIBAIJ"
2560 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2561 {
2562   Mat            A;
2563   PetscErrorCode ierr;
2564   int            fd;
2565   PetscInt       i,nz,j,rstart,rend;
2566   PetscScalar    *vals,*buf;
2567   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2568   MPI_Status     status;
2569   PetscMPIInt    rank,size,maxnz;
2570   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2571   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
2572   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
2573   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
2574   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
2575   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
2576 
2577   PetscFunctionBegin;
2578   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
2579     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2580   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2581 
2582   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2583   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2584   if (!rank) {
2585     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2586     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2587     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2588   }
2589 
2590   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2591   M = header[1]; N = header[2];
2592 
2593   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2594 
2595   /*
2596      This code adds extra rows to make sure the number of rows is
2597      divisible by the blocksize
2598   */
2599   Mbs        = M/bs;
2600   extra_rows = bs - M + bs*Mbs;
2601   if (extra_rows == bs) extra_rows = 0;
2602   else                  Mbs++;
2603   if (extra_rows && !rank) {
2604     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2605   }
2606 
2607   /* determine ownership of all rows */
2608   mbs        = Mbs/size + ((Mbs % size) > rank);
2609   m          = mbs*bs;
2610   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
2611   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2612 
2613   /* process 0 needs enough room for process with most rows */
2614   if (!rank) {
2615     mmax = rowners[1];
2616     for (i=2; i<size; i++) {
2617       mmax = PetscMax(mmax,rowners[i]);
2618     }
2619     mmax*=bs;
2620   } else mmax = m;
2621 
2622   rowners[0] = 0;
2623   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
2624   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2625   rstart = rowners[rank];
2626   rend   = rowners[rank+1];
2627 
2628   /* distribute row lengths to all processors */
2629   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2630   if (!rank) {
2631     mend = m;
2632     if (size == 1) mend = mend - extra_rows;
2633     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
2634     for (j=mend; j<m; j++) locrowlens[j] = 1;
2635     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2636     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2637     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2638     for (j=0; j<m; j++) {
2639       procsnz[0] += locrowlens[j];
2640     }
2641     for (i=1; i<size; i++) {
2642       mend = browners[i+1] - browners[i];
2643       if (i == size-1) mend = mend - extra_rows;
2644       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
2645       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2646       /* calculate the number of nonzeros on each processor */
2647       for (j=0; j<browners[i+1]-browners[i]; j++) {
2648         procsnz[i] += rowlengths[j];
2649       }
2650       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2651     }
2652     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2653   } else {
2654     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2655   }
2656 
2657   if (!rank) {
2658     /* determine max buffer needed and allocate it */
2659     maxnz = procsnz[0];
2660     for (i=1; i<size; i++) {
2661       maxnz = PetscMax(maxnz,procsnz[i]);
2662     }
2663     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2664 
2665     /* read in my part of the matrix column indices  */
2666     nz     = procsnz[0];
2667     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2668     mycols = ibuf;
2669     if (size == 1)  nz -= extra_rows;
2670     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2671     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2672 
2673     /* read in every ones (except the last) and ship off */
2674     for (i=1; i<size-1; i++) {
2675       nz   = procsnz[i];
2676       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2677       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2678     }
2679     /* read in the stuff for the last proc */
2680     if (size != 1) {
2681       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2682       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2683       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2684       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2685     }
2686     ierr = PetscFree(cols);CHKERRQ(ierr);
2687   } else {
2688     /* determine buffer space needed for message */
2689     nz = 0;
2690     for (i=0; i<m; i++) {
2691       nz += locrowlens[i];
2692     }
2693     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2694     mycols = ibuf;
2695     /* receive message of column indices*/
2696     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2697     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2698     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2699   }
2700 
2701   /* loop over local rows, determining number of off diagonal entries */
2702   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2703   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2704   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2705   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2706   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2707   rowcount = 0; nzcount = 0;
2708   for (i=0; i<mbs; i++) {
2709     dcount  = 0;
2710     odcount = 0;
2711     for (j=0; j<bs; j++) {
2712       kmax = locrowlens[rowcount];
2713       for (k=0; k<kmax; k++) {
2714         tmp = mycols[nzcount++]/bs;
2715         if (!mask[tmp]) {
2716           mask[tmp] = 1;
2717           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2718           else masked1[dcount++] = tmp;
2719         }
2720       }
2721       rowcount++;
2722     }
2723 
2724     dlens[i]  = dcount;
2725     odlens[i] = odcount;
2726 
2727     /* zero out the mask elements we set */
2728     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2729     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2730   }
2731 
2732   /* create our matrix */
2733   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2734   ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2735   ierr = MatSetType(A,type);CHKERRQ(ierr)
2736   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2737 
2738   if (!rank) {
2739     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2740     /* read in my part of the matrix numerical values  */
2741     nz = procsnz[0];
2742     vals = buf;
2743     mycols = ibuf;
2744     if (size == 1)  nz -= extra_rows;
2745     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2746     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2747 
2748     /* insert into matrix */
2749     jj      = rstart*bs;
2750     for (i=0; i<m; i++) {
2751       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2752       mycols += locrowlens[i];
2753       vals   += locrowlens[i];
2754       jj++;
2755     }
2756     /* read in other processors (except the last one) and ship out */
2757     for (i=1; i<size-1; i++) {
2758       nz   = procsnz[i];
2759       vals = buf;
2760       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2761       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2762     }
2763     /* the last proc */
2764     if (size != 1){
2765       nz   = procsnz[i] - extra_rows;
2766       vals = buf;
2767       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2768       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2769       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2770     }
2771     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2772   } else {
2773     /* receive numeric values */
2774     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2775 
2776     /* receive message of values*/
2777     vals   = buf;
2778     mycols = ibuf;
2779     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2780     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2781     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2782 
2783     /* insert into matrix */
2784     jj      = rstart*bs;
2785     for (i=0; i<m; i++) {
2786       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2787       mycols += locrowlens[i];
2788       vals   += locrowlens[i];
2789       jj++;
2790     }
2791   }
2792   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2793   ierr = PetscFree(buf);CHKERRQ(ierr);
2794   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2795   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2796   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2797   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2798   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2799   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2800 
2801   *newmat = A;
2802   PetscFunctionReturn(0);
2803 }
2804 
2805 #undef __FUNCT__
2806 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2807 /*@
2808    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2809 
2810    Input Parameters:
2811 .  mat  - the matrix
2812 .  fact - factor
2813 
2814    Collective on Mat
2815 
2816    Level: advanced
2817 
2818   Notes:
2819    This can also be set by the command line option: -mat_use_hash_table <fact>
2820 
2821 .keywords: matrix, hashtable, factor, HT
2822 
2823 .seealso: MatSetOption()
2824 @*/
2825 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2826 {
2827   PetscErrorCode ierr,(*f)(Mat,PetscReal);
2828 
2829   PetscFunctionBegin;
2830   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2831   if (f) {
2832     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2833   }
2834   PetscFunctionReturn(0);
2835 }
2836 
2837 EXTERN_C_BEGIN
2838 #undef __FUNCT__
2839 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2840 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2841 {
2842   Mat_MPIBAIJ *baij;
2843 
2844   PetscFunctionBegin;
2845   baij = (Mat_MPIBAIJ*)mat->data;
2846   baij->ht_fact = fact;
2847   PetscFunctionReturn(0);
2848 }
2849 EXTERN_C_END
2850 
2851 #undef __FUNCT__
2852 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2853 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2854 {
2855   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2856   PetscFunctionBegin;
2857   *Ad     = a->A;
2858   *Ao     = a->B;
2859   *colmap = a->garray;
2860   PetscFunctionReturn(0);
2861 }
2862 
2863 /*
2864     Special version for direct calls from Fortran (to eliminate two function call overheads
2865 */
2866 #if defined(PETSC_HAVE_FORTRAN_CAPS)
2867 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
2868 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
2869 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
2870 #endif
2871 
2872 #undef __FUNCT__
2873 #define __FUNCT__ "matmpibiajsetvaluesblocked"
2874 /*@C
2875   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
2876 
2877   Collective on Mat
2878 
2879   Input Parameters:
2880 + mat - the matrix
2881 . min - number of input rows
2882 . im - input rows
2883 . nin - number of input columns
2884 . in - input columns
2885 . v - numerical values input
2886 - addvin - INSERT_VALUES or ADD_VALUES
2887 
2888   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
2889 
2890   Level: advanced
2891 
2892 .seealso:   MatSetValuesBlocked()
2893 @*/
2894 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
2895 {
2896   /* convert input arguments to C version */
2897   Mat             mat = *matin;
2898   PetscInt        m = *min, n = *nin;
2899   InsertMode      addv = *addvin;
2900 
2901   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
2902   const MatScalar *value;
2903   MatScalar       *barray=baij->barray;
2904   PetscTruth      roworiented = baij->roworiented;
2905   PetscErrorCode  ierr;
2906   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
2907   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
2908   PetscInt        cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2;
2909 
2910   PetscFunctionBegin;
2911   /* tasks normally handled by MatSetValuesBlocked() */
2912   if (mat->insertmode == NOT_SET_VALUES) {
2913     mat->insertmode = addv;
2914   }
2915 #if defined(PETSC_USE_DEBUG)
2916   else if (mat->insertmode != addv) {
2917     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
2918   }
2919   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2920 #endif
2921   if (mat->assembled) {
2922     mat->was_assembled = PETSC_TRUE;
2923     mat->assembled     = PETSC_FALSE;
2924   }
2925   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
2926 
2927 
2928   if(!barray) {
2929     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
2930     baij->barray = barray;
2931   }
2932 
2933   if (roworiented) {
2934     stepval = (n-1)*bs;
2935   } else {
2936     stepval = (m-1)*bs;
2937   }
2938   for (i=0; i<m; i++) {
2939     if (im[i] < 0) continue;
2940 #if defined(PETSC_USE_DEBUG)
2941     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
2942 #endif
2943     if (im[i] >= rstart && im[i] < rend) {
2944       row = im[i] - rstart;
2945       for (j=0; j<n; j++) {
2946         /* If NumCol = 1 then a copy is not required */
2947         if ((roworiented) && (n == 1)) {
2948           barray = (MatScalar*)v + i*bs2;
2949         } else if((!roworiented) && (m == 1)) {
2950           barray = (MatScalar*)v + j*bs2;
2951         } else { /* Here a copy is required */
2952           if (roworiented) {
2953             value = v + i*(stepval+bs)*bs + j*bs;
2954           } else {
2955             value = v + j*(stepval+bs)*bs + i*bs;
2956           }
2957           for (ii=0; ii<bs; ii++,value+=stepval) {
2958             for (jj=0; jj<bs; jj++) {
2959               *barray++  = *value++;
2960             }
2961           }
2962           barray -=bs2;
2963         }
2964 
2965         if (in[j] >= cstart && in[j] < cend){
2966           col  = in[j] - cstart;
2967           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
2968         }
2969         else if (in[j] < 0) continue;
2970 #if defined(PETSC_USE_DEBUG)
2971         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
2972 #endif
2973         else {
2974           if (mat->was_assembled) {
2975             if (!baij->colmap) {
2976               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2977             }
2978 
2979 #if defined(PETSC_USE_DEBUG)
2980 #if defined (PETSC_USE_CTABLE)
2981             { PetscInt data;
2982               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
2983               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
2984             }
2985 #else
2986             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
2987 #endif
2988 #endif
2989 #if defined (PETSC_USE_CTABLE)
2990 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
2991             col  = (col - 1)/bs;
2992 #else
2993             col = (baij->colmap[in[j]] - 1)/bs;
2994 #endif
2995             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
2996               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
2997               col =  in[j];
2998             }
2999           }
3000           else col = in[j];
3001           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3002         }
3003       }
3004     } else {
3005       if (!baij->donotstash) {
3006         if (roworiented) {
3007           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3008         } else {
3009           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3010         }
3011       }
3012     }
3013   }
3014 
3015   /* task normally handled by MatSetValuesBlocked() */
3016   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3017   PetscFunctionReturn(0);
3018 }
3019