xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 6abefa4f446b2ea91915da4e25ea0defa83db48e)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 #include "petscblaslapack.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
20 {
21   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
22   PetscErrorCode ierr;
23   PetscInt       i,*idxb = 0;
24   PetscScalar    *va,*vb;
25   Vec            vtmp;
26 
27   PetscFunctionBegin;
28   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
29   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
30   if (idx) {
31     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap->n; i++){
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
41   }
42 
43   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
44   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
45   if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);}
46   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
47   PetscFunctionReturn(0);
48 }
49 
50 EXTERN_C_BEGIN
51 #undef __FUNCT__
52 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
54 {
55   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
56   PetscErrorCode ierr;
57 
58   PetscFunctionBegin;
59   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
60   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
61   PetscFunctionReturn(0);
62 }
63 EXTERN_C_END
64 
65 EXTERN_C_BEGIN
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 EXTERN_C_END
79 
80 /*
81      Local utility routine that creates a mapping from the global column
82    number to the local number in the off-diagonal part of the local
83    storage of the matrix.  This is done in a non scalable way since the
84    length of colmap equals the global matrix length.
85 */
86 #undef __FUNCT__
87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
89 {
90   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
91   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
92   PetscErrorCode ierr;
93   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
94 
95   PetscFunctionBegin;
96 #if defined (PETSC_USE_CTABLE)
97   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
98   for (i=0; i<nbs; i++){
99     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
100   }
101 #else
102   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
103   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
105   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
106 #endif
107   PetscFunctionReturn(0);
108 }
109 
110 #define CHUNKSIZE  10
111 
112 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
113 { \
114  \
115     brow = row/bs;  \
116     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
117     rmax = aimax[brow]; nrow = ailen[brow]; \
118       bcol = col/bs; \
119       ridx = row % bs; cidx = col % bs; \
120       low = 0; high = nrow; \
121       while (high-low > 3) { \
122         t = (low+high)/2; \
123         if (rp[t] > bcol) high = t; \
124         else              low  = t; \
125       } \
126       for (_i=low; _i<high; _i++) { \
127         if (rp[_i] > bcol) break; \
128         if (rp[_i] == bcol) { \
129           bap  = ap +  bs2*_i + bs*cidx + ridx; \
130           if (addv == ADD_VALUES) *bap += value;  \
131           else                    *bap  = value;  \
132           goto a_noinsert; \
133         } \
134       } \
135       if (a->nonew == 1) goto a_noinsert; \
136       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
137       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
138       N = nrow++ - 1;  \
139       /* shift up all the later entries in this row */ \
140       for (ii=N; ii>=_i; ii--) { \
141         rp[ii+1] = rp[ii]; \
142         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
143       } \
144       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
145       rp[_i]                      = bcol;  \
146       ap[bs2*_i + bs*cidx + ridx] = value;  \
147       a_noinsert:; \
148     ailen[brow] = nrow; \
149 }
150 
151 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
152 { \
153     brow = row/bs;  \
154     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
155     rmax = bimax[brow]; nrow = bilen[brow]; \
156       bcol = col/bs; \
157       ridx = row % bs; cidx = col % bs; \
158       low = 0; high = nrow; \
159       while (high-low > 3) { \
160         t = (low+high)/2; \
161         if (rp[t] > bcol) high = t; \
162         else              low  = t; \
163       } \
164       for (_i=low; _i<high; _i++) { \
165         if (rp[_i] > bcol) break; \
166         if (rp[_i] == bcol) { \
167           bap  = ap +  bs2*_i + bs*cidx + ridx; \
168           if (addv == ADD_VALUES) *bap += value;  \
169           else                    *bap  = value;  \
170           goto b_noinsert; \
171         } \
172       } \
173       if (b->nonew == 1) goto b_noinsert; \
174       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
175       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
176       CHKMEMQ;\
177       N = nrow++ - 1;  \
178       /* shift up all the later entries in this row */ \
179       for (ii=N; ii>=_i; ii--) { \
180         rp[ii+1] = rp[ii]; \
181         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
182       } \
183       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
184       rp[_i]                      = bcol;  \
185       ap[bs2*_i + bs*cidx + ridx] = value;  \
186       b_noinsert:; \
187     bilen[brow] = nrow; \
188 }
189 
190 #undef __FUNCT__
191 #define __FUNCT__ "MatSetValues_MPIBAIJ"
192 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
193 {
194   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
195   MatScalar      value;
196   PetscTruth     roworiented = baij->roworiented;
197   PetscErrorCode ierr;
198   PetscInt       i,j,row,col;
199   PetscInt       rstart_orig=mat->rmap->rstart;
200   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
201   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
202 
203   /* Some Variables required in the macro */
204   Mat            A = baij->A;
205   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
206   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
207   MatScalar      *aa=a->a;
208 
209   Mat            B = baij->B;
210   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
211   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
212   MatScalar      *ba=b->a;
213 
214   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
215   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
216   MatScalar      *ap,*bap;
217 
218   PetscFunctionBegin;
219   if (v) PetscValidScalarPointer(v,6);
220   for (i=0; i<m; i++) {
221     if (im[i] < 0) continue;
222 #if defined(PETSC_USE_DEBUG)
223     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
224 #endif
225     if (im[i] >= rstart_orig && im[i] < rend_orig) {
226       row = im[i] - rstart_orig;
227       for (j=0; j<n; j++) {
228         if (in[j] >= cstart_orig && in[j] < cend_orig){
229           col = in[j] - cstart_orig;
230           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
231           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
232           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
233         } else if (in[j] < 0) continue;
234 #if defined(PETSC_USE_DEBUG)
235         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1);}
236 #endif
237         else {
238           if (mat->was_assembled) {
239             if (!baij->colmap) {
240               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
241             }
242 #if defined (PETSC_USE_CTABLE)
243             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
244             col  = col - 1;
245 #else
246             col = baij->colmap[in[j]/bs] - 1;
247 #endif
248             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
249               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
250               col =  in[j];
251               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
252               B = baij->B;
253               b = (Mat_SeqBAIJ*)(B)->data;
254               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
255               ba=b->a;
256             } else col += in[j]%bs;
257           } else col = in[j];
258           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
259           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!baij->donotstash) {
265         if (roworiented) {
266           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
267         } else {
268           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
269         }
270       }
271     }
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNCT__
277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
279 {
280   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
281   const PetscScalar *value;
282   MatScalar         *barray=baij->barray;
283   PetscTruth        roworiented = baij->roworiented;
284   PetscErrorCode    ierr;
285   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
286   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
287   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
288 
289   PetscFunctionBegin;
290   if(!barray) {
291     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
292     baij->barray = barray;
293   }
294 
295   if (roworiented) {
296     stepval = (n-1)*bs;
297   } else {
298     stepval = (m-1)*bs;
299   }
300   for (i=0; i<m; i++) {
301     if (im[i] < 0) continue;
302 #if defined(PETSC_USE_DEBUG)
303     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
304 #endif
305     if (im[i] >= rstart && im[i] < rend) {
306       row = im[i] - rstart;
307       for (j=0; j<n; j++) {
308         /* If NumCol = 1 then a copy is not required */
309         if ((roworiented) && (n == 1)) {
310           barray = (MatScalar*)v + i*bs2;
311         } else if((!roworiented) && (m == 1)) {
312           barray = (MatScalar*)v + j*bs2;
313         } else { /* Here a copy is required */
314           if (roworiented) {
315             value = v + i*(stepval+bs)*bs + j*bs;
316           } else {
317             value = v + j*(stepval+bs)*bs + i*bs;
318           }
319           for (ii=0; ii<bs; ii++,value+=stepval) {
320             for (jj=0; jj<bs; jj++) {
321               *barray++  = *value++;
322             }
323           }
324           barray -=bs2;
325         }
326 
327         if (in[j] >= cstart && in[j] < cend){
328           col  = in[j] - cstart;
329           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
330         }
331         else if (in[j] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
334 #endif
335         else {
336           if (mat->was_assembled) {
337             if (!baij->colmap) {
338               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
339             }
340 
341 #if defined(PETSC_USE_DEBUG)
342 #if defined (PETSC_USE_CTABLE)
343             { PetscInt data;
344               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
345               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
346             }
347 #else
348             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
349 #endif
350 #endif
351 #if defined (PETSC_USE_CTABLE)
352 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
353             col  = (col - 1)/bs;
354 #else
355             col = (baij->colmap[in[j]] - 1)/bs;
356 #endif
357             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
358               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
359               col =  in[j];
360             }
361           }
362           else col = in[j];
363           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
364         }
365       }
366     } else {
367       if (!baij->donotstash) {
368         if (roworiented) {
369           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
370         } else {
371           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
372         }
373       }
374     }
375   }
376   PetscFunctionReturn(0);
377 }
378 
379 #define HASH_KEY 0.6180339887
380 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
381 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
382 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
383 #undef __FUNCT__
384 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
385 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
386 {
387   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
388   PetscTruth     roworiented = baij->roworiented;
389   PetscErrorCode ierr;
390   PetscInt       i,j,row,col;
391   PetscInt       rstart_orig=mat->rmap->rstart;
392   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
393   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
394   PetscReal      tmp;
395   MatScalar      **HD = baij->hd,value;
396 #if defined(PETSC_USE_DEBUG)
397   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
398 #endif
399 
400   PetscFunctionBegin;
401   if (v) PetscValidScalarPointer(v,6);
402   for (i=0; i<m; i++) {
403 #if defined(PETSC_USE_DEBUG)
404     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
405     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
406 #endif
407       row = im[i];
408     if (row >= rstart_orig && row < rend_orig) {
409       for (j=0; j<n; j++) {
410         col = in[j];
411         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
412         /* Look up PetscInto the Hash Table */
413         key = (row/bs)*Nbs+(col/bs)+1;
414         h1  = HASH(size,key,tmp);
415 
416 
417         idx = h1;
418 #if defined(PETSC_USE_DEBUG)
419         insert_ct++;
420         total_ct++;
421         if (HT[idx] != key) {
422           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
423           if (idx == size) {
424             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
425             if (idx == h1) {
426               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
427             }
428           }
429         }
430 #else
431         if (HT[idx] != key) {
432           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
433           if (idx == size) {
434             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
435             if (idx == h1) {
436               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
437             }
438           }
439         }
440 #endif
441         /* A HASH table entry is found, so insert the values at the correct address */
442         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
443         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
444       }
445     } else {
446       if (!baij->donotstash) {
447         if (roworiented) {
448           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
449         } else {
450           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
451         }
452       }
453     }
454   }
455 #if defined(PETSC_USE_DEBUG)
456   baij->ht_total_ct = total_ct;
457   baij->ht_insert_ct = insert_ct;
458 #endif
459   PetscFunctionReturn(0);
460 }
461 
462 #undef __FUNCT__
463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
465 {
466   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
467   PetscTruth        roworiented = baij->roworiented;
468   PetscErrorCode    ierr;
469   PetscInt          i,j,ii,jj,row,col;
470   PetscInt          rstart=baij->rstartbs;
471   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
472   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
473   PetscReal         tmp;
474   MatScalar         **HD = baij->hd,*baij_a;
475   const PetscScalar *v_t,*value;
476 #if defined(PETSC_USE_DEBUG)
477   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
478 #endif
479 
480   PetscFunctionBegin;
481 
482   if (roworiented) {
483     stepval = (n-1)*bs;
484   } else {
485     stepval = (m-1)*bs;
486   }
487   for (i=0; i<m; i++) {
488 #if defined(PETSC_USE_DEBUG)
489     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
490     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
491 #endif
492     row   = im[i];
493     v_t   = v + i*nbs2;
494     if (row >= rstart && row < rend) {
495       for (j=0; j<n; j++) {
496         col = in[j];
497 
498         /* Look up into the Hash Table */
499         key = row*Nbs+col+1;
500         h1  = HASH(size,key,tmp);
501 
502         idx = h1;
503 #if defined(PETSC_USE_DEBUG)
504         total_ct++;
505         insert_ct++;
506        if (HT[idx] != key) {
507           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
508           if (idx == size) {
509             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
510             if (idx == h1) {
511               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
512             }
513           }
514         }
515 #else
516         if (HT[idx] != key) {
517           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
518           if (idx == size) {
519             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
520             if (idx == h1) {
521               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
522             }
523           }
524         }
525 #endif
526         baij_a = HD[idx];
527         if (roworiented) {
528           /*value = v + i*(stepval+bs)*bs + j*bs;*/
529           /* value = v + (i*(stepval+bs)+j)*bs; */
530           value = v_t;
531           v_t  += bs;
532           if (addv == ADD_VALUES) {
533             for (ii=0; ii<bs; ii++,value+=stepval) {
534               for (jj=ii; jj<bs2; jj+=bs) {
535                 baij_a[jj]  += *value++;
536               }
537             }
538           } else {
539             for (ii=0; ii<bs; ii++,value+=stepval) {
540               for (jj=ii; jj<bs2; jj+=bs) {
541                 baij_a[jj]  = *value++;
542               }
543             }
544           }
545         } else {
546           value = v + j*(stepval+bs)*bs + i*bs;
547           if (addv == ADD_VALUES) {
548             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
549               for (jj=0; jj<bs; jj++) {
550                 baij_a[jj]  += *value++;
551               }
552             }
553           } else {
554             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
555               for (jj=0; jj<bs; jj++) {
556                 baij_a[jj]  = *value++;
557               }
558             }
559           }
560         }
561       }
562     } else {
563       if (!baij->donotstash) {
564         if (roworiented) {
565           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
566         } else {
567           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
568         }
569       }
570     }
571   }
572 #if defined(PETSC_USE_DEBUG)
573   baij->ht_total_ct = total_ct;
574   baij->ht_insert_ct = insert_ct;
575 #endif
576   PetscFunctionReturn(0);
577 }
578 
579 #undef __FUNCT__
580 #define __FUNCT__ "MatGetValues_MPIBAIJ"
581 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
582 {
583   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
584   PetscErrorCode ierr;
585   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
586   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
587 
588   PetscFunctionBegin;
589   for (i=0; i<m; i++) {
590     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
591     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
592     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
593       row = idxm[i] - bsrstart;
594       for (j=0; j<n; j++) {
595         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
596         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
597         if (idxn[j] >= bscstart && idxn[j] < bscend){
598           col = idxn[j] - bscstart;
599           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
600         } else {
601           if (!baij->colmap) {
602             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
603           }
604 #if defined (PETSC_USE_CTABLE)
605           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
606           data --;
607 #else
608           data = baij->colmap[idxn[j]/bs]-1;
609 #endif
610           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
611           else {
612             col  = data + idxn[j]%bs;
613             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
614           }
615         }
616       }
617     } else {
618       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
619     }
620   }
621  PetscFunctionReturn(0);
622 }
623 
624 #undef __FUNCT__
625 #define __FUNCT__ "MatNorm_MPIBAIJ"
626 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
627 {
628   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
629   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
630   PetscErrorCode ierr;
631   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
632   PetscReal      sum = 0.0;
633   MatScalar      *v;
634 
635   PetscFunctionBegin;
636   if (baij->size == 1) {
637     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
638   } else {
639     if (type == NORM_FROBENIUS) {
640       v = amat->a;
641       nz = amat->nz*bs2;
642       for (i=0; i<nz; i++) {
643 #if defined(PETSC_USE_COMPLEX)
644         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
645 #else
646         sum += (*v)*(*v); v++;
647 #endif
648       }
649       v = bmat->a;
650       nz = bmat->nz*bs2;
651       for (i=0; i<nz; i++) {
652 #if defined(PETSC_USE_COMPLEX)
653         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
654 #else
655         sum += (*v)*(*v); v++;
656 #endif
657       }
658       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
659       *nrm = sqrt(*nrm);
660     } else if (type == NORM_1) { /* max column sum */
661       PetscReal *tmp,*tmp2;
662       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
663       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
664       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
665       v = amat->a; jj = amat->j;
666       for (i=0; i<amat->nz; i++) {
667         for (j=0; j<bs; j++){
668           col = bs*(cstart + *jj) + j; /* column index */
669           for (row=0; row<bs; row++){
670             tmp[col] += PetscAbsScalar(*v);  v++;
671           }
672         }
673         jj++;
674       }
675       v = bmat->a; jj = bmat->j;
676       for (i=0; i<bmat->nz; i++) {
677         for (j=0; j<bs; j++){
678           col = bs*garray[*jj] + j;
679           for (row=0; row<bs; row++){
680             tmp[col] += PetscAbsScalar(*v); v++;
681           }
682         }
683         jj++;
684       }
685       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
686       *nrm = 0.0;
687       for (j=0; j<mat->cmap->N; j++) {
688         if (tmp2[j] > *nrm) *nrm = tmp2[j];
689       }
690       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
691     } else if (type == NORM_INFINITY) { /* max row sum */
692       PetscReal *sums;
693       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
694       sum = 0.0;
695       for (j=0; j<amat->mbs; j++) {
696         for (row=0; row<bs; row++) sums[row] = 0.0;
697         v = amat->a + bs2*amat->i[j];
698         nz = amat->i[j+1]-amat->i[j];
699         for (i=0; i<nz; i++) {
700           for (col=0; col<bs; col++){
701             for (row=0; row<bs; row++){
702               sums[row] += PetscAbsScalar(*v); v++;
703             }
704           }
705         }
706         v = bmat->a + bs2*bmat->i[j];
707         nz = bmat->i[j+1]-bmat->i[j];
708         for (i=0; i<nz; i++) {
709           for (col=0; col<bs; col++){
710             for (row=0; row<bs; row++){
711               sums[row] += PetscAbsScalar(*v); v++;
712             }
713           }
714         }
715         for (row=0; row<bs; row++){
716           if (sums[row] > sum) sum = sums[row];
717         }
718       }
719       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
720       ierr = PetscFree(sums);CHKERRQ(ierr);
721     } else {
722       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
723     }
724   }
725   PetscFunctionReturn(0);
726 }
727 
728 /*
729   Creates the hash table, and sets the table
730   This table is created only once.
731   If new entried need to be added to the matrix
732   then the hash table has to be destroyed and
733   recreated.
734 */
735 #undef __FUNCT__
736 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
737 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
738 {
739   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
740   Mat            A = baij->A,B=baij->B;
741   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
742   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
743   PetscErrorCode ierr;
744   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
745   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
746   PetscInt       *HT,key;
747   MatScalar      **HD;
748   PetscReal      tmp;
749 #if defined(PETSC_USE_INFO)
750   PetscInt       ct=0,max=0;
751 #endif
752 
753   PetscFunctionBegin;
754   if (baij->ht) PetscFunctionReturn(0);
755 
756   baij->ht_size = (PetscInt)(factor*nz);
757   ht_size       = baij->ht_size;
758 
759   /* Allocate Memory for Hash Table */
760   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
761   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
762   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
763   HD   = baij->hd;
764   HT   = baij->ht;
765 
766   /* Loop Over A */
767   for (i=0; i<a->mbs; i++) {
768     for (j=ai[i]; j<ai[i+1]; j++) {
769       row = i+rstart;
770       col = aj[j]+cstart;
771 
772       key = row*Nbs + col + 1;
773       h1  = HASH(ht_size,key,tmp);
774       for (k=0; k<ht_size; k++){
775         if (!HT[(h1+k)%ht_size]) {
776           HT[(h1+k)%ht_size] = key;
777           HD[(h1+k)%ht_size] = a->a + j*bs2;
778           break;
779 #if defined(PETSC_USE_INFO)
780         } else {
781           ct++;
782 #endif
783         }
784       }
785 #if defined(PETSC_USE_INFO)
786       if (k> max) max = k;
787 #endif
788     }
789   }
790   /* Loop Over B */
791   for (i=0; i<b->mbs; i++) {
792     for (j=bi[i]; j<bi[i+1]; j++) {
793       row = i+rstart;
794       col = garray[bj[j]];
795       key = row*Nbs + col + 1;
796       h1  = HASH(ht_size,key,tmp);
797       for (k=0; k<ht_size; k++){
798         if (!HT[(h1+k)%ht_size]) {
799           HT[(h1+k)%ht_size] = key;
800           HD[(h1+k)%ht_size] = b->a + j*bs2;
801           break;
802 #if defined(PETSC_USE_INFO)
803         } else {
804           ct++;
805 #endif
806         }
807       }
808 #if defined(PETSC_USE_INFO)
809       if (k> max) max = k;
810 #endif
811     }
812   }
813 
814   /* Print Summary */
815 #if defined(PETSC_USE_INFO)
816   for (i=0,j=0; i<ht_size; i++) {
817     if (HT[i]) {j++;}
818   }
819   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
820 #endif
821   PetscFunctionReturn(0);
822 }
823 
824 #undef __FUNCT__
825 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
826 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
827 {
828   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
829   PetscErrorCode ierr;
830   PetscInt       nstash,reallocs;
831   InsertMode     addv;
832 
833   PetscFunctionBegin;
834   if (baij->donotstash) {
835     PetscFunctionReturn(0);
836   }
837 
838   /* make sure all processors are either in INSERTMODE or ADDMODE */
839   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
840   if (addv == (ADD_VALUES|INSERT_VALUES)) {
841     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
842   }
843   mat->insertmode = addv; /* in case this processor had no cache */
844 
845   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
846   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
847   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
848   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
849   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
850   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
851   PetscFunctionReturn(0);
852 }
853 
854 #undef __FUNCT__
855 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
856 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
857 {
858   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
859   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
860   PetscErrorCode ierr;
861   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
862   PetscInt       *row,*col;
863   PetscTruth     r1,r2,r3,other_disassembled;
864   MatScalar      *val;
865   InsertMode     addv = mat->insertmode;
866   PetscMPIInt    n;
867 
868   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
869   PetscFunctionBegin;
870   if (!baij->donotstash) {
871     while (1) {
872       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
873       if (!flg) break;
874 
875       for (i=0; i<n;) {
876         /* Now identify the consecutive vals belonging to the same row */
877         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
878         if (j < n) ncols = j-i;
879         else       ncols = n-i;
880         /* Now assemble all these values with a single function call */
881         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
882         i = j;
883       }
884     }
885     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
886     /* Now process the block-stash. Since the values are stashed column-oriented,
887        set the roworiented flag to column oriented, and after MatSetValues()
888        restore the original flags */
889     r1 = baij->roworiented;
890     r2 = a->roworiented;
891     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
892     baij->roworiented = PETSC_FALSE;
893     a->roworiented    = PETSC_FALSE;
894     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
895     while (1) {
896       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
897       if (!flg) break;
898 
899       for (i=0; i<n;) {
900         /* Now identify the consecutive vals belonging to the same row */
901         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
902         if (j < n) ncols = j-i;
903         else       ncols = n-i;
904         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
905         i = j;
906       }
907     }
908     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
909     baij->roworiented = r1;
910     a->roworiented    = r2;
911     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
912   }
913 
914   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
915   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
916 
917   /* determine if any processor has disassembled, if so we must
918      also disassemble ourselfs, in order that we may reassemble. */
919   /*
920      if nonzero structure of submatrix B cannot change then we know that
921      no processor disassembled thus we can skip this stuff
922   */
923   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
924     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
925     if (mat->was_assembled && !other_disassembled) {
926       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
927     }
928   }
929 
930   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
931     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
932   }
933   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
934   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
935   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
936 
937 #if defined(PETSC_USE_INFO)
938   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
939     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
940     baij->ht_total_ct  = 0;
941     baij->ht_insert_ct = 0;
942   }
943 #endif
944   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
945     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
946     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
947     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
948   }
949 
950   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
951   baij->rowvalues = 0;
952   PetscFunctionReturn(0);
953 }
954 
955 #undef __FUNCT__
956 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
957 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
958 {
959   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
960   PetscErrorCode    ierr;
961   PetscMPIInt       size = baij->size,rank = baij->rank;
962   PetscInt          bs = mat->rmap->bs;
963   PetscTruth        iascii,isdraw;
964   PetscViewer       sviewer;
965   PetscViewerFormat format;
966 
967   PetscFunctionBegin;
968   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
969   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
970   if (iascii) {
971     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
972     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
973       MatInfo info;
974       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
975       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
976       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
977               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
978               mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
979       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
980       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
981       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
982       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
983       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
984       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
985       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
986       PetscFunctionReturn(0);
987     } else if (format == PETSC_VIEWER_ASCII_INFO) {
988       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
989       PetscFunctionReturn(0);
990     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
991       PetscFunctionReturn(0);
992     }
993   }
994 
995   if (isdraw) {
996     PetscDraw       draw;
997     PetscTruth isnull;
998     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
999     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1000   }
1001 
1002   if (size == 1) {
1003     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1004     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1005   } else {
1006     /* assemble the entire matrix onto first processor. */
1007     Mat         A;
1008     Mat_SeqBAIJ *Aloc;
1009     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1010     MatScalar   *a;
1011 
1012     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1013     /* Perhaps this should be the type of mat? */
1014     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1015     if (!rank) {
1016       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1017     } else {
1018       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1019     }
1020     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1021     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1022     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1023 
1024     /* copy over the A part */
1025     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1026     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1027     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1028 
1029     for (i=0; i<mbs; i++) {
1030       rvals[0] = bs*(baij->rstartbs + i);
1031       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1032       for (j=ai[i]; j<ai[i+1]; j++) {
1033         col = (baij->cstartbs+aj[j])*bs;
1034         for (k=0; k<bs; k++) {
1035           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1036           col++; a += bs;
1037         }
1038       }
1039     }
1040     /* copy over the B part */
1041     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1042     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1043     for (i=0; i<mbs; i++) {
1044       rvals[0] = bs*(baij->rstartbs + i);
1045       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1046       for (j=ai[i]; j<ai[i+1]; j++) {
1047         col = baij->garray[aj[j]]*bs;
1048         for (k=0; k<bs; k++) {
1049           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1050           col++; a += bs;
1051         }
1052       }
1053     }
1054     ierr = PetscFree(rvals);CHKERRQ(ierr);
1055     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1056     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1057     /*
1058        Everyone has to call to draw the matrix since the graphics waits are
1059        synchronized across all processors that share the PetscDraw object
1060     */
1061     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1062     if (!rank) {
1063       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1064       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1065     }
1066     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1067     ierr = MatDestroy(A);CHKERRQ(ierr);
1068   }
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 #undef __FUNCT__
1073 #define __FUNCT__ "MatView_MPIBAIJ"
1074 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1075 {
1076   PetscErrorCode ierr;
1077   PetscTruth     iascii,isdraw,issocket,isbinary;
1078 
1079   PetscFunctionBegin;
1080   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1081   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1082   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1083   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1084   if (iascii || isdraw || issocket || isbinary) {
1085     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1086   } else {
1087     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1088   }
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 #undef __FUNCT__
1093 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1094 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1095 {
1096   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1097   PetscErrorCode ierr;
1098 
1099   PetscFunctionBegin;
1100 #if defined(PETSC_USE_LOG)
1101   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1102 #endif
1103   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1104   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1105   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1106   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1107 #if defined (PETSC_USE_CTABLE)
1108   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1109 #else
1110   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1111 #endif
1112   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1113   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1114   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1115   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1116   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1117   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1118   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1119   ierr = PetscFree(baij);CHKERRQ(ierr);
1120 
1121   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1122   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1123   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1124   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1125   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1126   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1127   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1128   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 #undef __FUNCT__
1133 #define __FUNCT__ "MatMult_MPIBAIJ"
1134 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1135 {
1136   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1137   PetscErrorCode ierr;
1138   PetscInt       nt;
1139 
1140   PetscFunctionBegin;
1141   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1142   if (nt != A->cmap->n) {
1143     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1144   }
1145   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1146   if (nt != A->rmap->n) {
1147     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1148   }
1149   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1150   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1151   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1152   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 #undef __FUNCT__
1157 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1158 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1159 {
1160   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1161   PetscErrorCode ierr;
1162 
1163   PetscFunctionBegin;
1164   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1165   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1166   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1167   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 #undef __FUNCT__
1172 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1173 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1174 {
1175   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1176   PetscErrorCode ierr;
1177   PetscTruth     merged;
1178 
1179   PetscFunctionBegin;
1180   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1181   /* do nondiagonal part */
1182   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1183   if (!merged) {
1184     /* send it on its way */
1185     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1186     /* do local part */
1187     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1188     /* receive remote parts: note this assumes the values are not actually */
1189     /* inserted in yy until the next line */
1190     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1191   } else {
1192     /* do local part */
1193     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1194     /* send it on its way */
1195     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1196     /* values actually were received in the Begin() but we need to call this nop */
1197     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1198   }
1199   PetscFunctionReturn(0);
1200 }
1201 
1202 #undef __FUNCT__
1203 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1204 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1205 {
1206   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1207   PetscErrorCode ierr;
1208 
1209   PetscFunctionBegin;
1210   /* do nondiagonal part */
1211   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1212   /* send it on its way */
1213   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1214   /* do local part */
1215   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1216   /* receive remote parts: note this assumes the values are not actually */
1217   /* inserted in yy until the next line, which is true for my implementation*/
1218   /* but is not perhaps always true. */
1219   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1220   PetscFunctionReturn(0);
1221 }
1222 
1223 /*
1224   This only works correctly for square matrices where the subblock A->A is the
1225    diagonal block
1226 */
1227 #undef __FUNCT__
1228 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1229 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1230 {
1231   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1232   PetscErrorCode ierr;
1233 
1234   PetscFunctionBegin;
1235   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1236   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 #undef __FUNCT__
1241 #define __FUNCT__ "MatScale_MPIBAIJ"
1242 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1243 {
1244   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1245   PetscErrorCode ierr;
1246 
1247   PetscFunctionBegin;
1248   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1249   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1255 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1256 {
1257   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1258   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1259   PetscErrorCode ierr;
1260   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1261   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1262   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1263 
1264   PetscFunctionBegin;
1265   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1266   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1267   mat->getrowactive = PETSC_TRUE;
1268 
1269   if (!mat->rowvalues && (idx || v)) {
1270     /*
1271         allocate enough space to hold information from the longest row.
1272     */
1273     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1274     PetscInt     max = 1,mbs = mat->mbs,tmp;
1275     for (i=0; i<mbs; i++) {
1276       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1277       if (max < tmp) { max = tmp; }
1278     }
1279     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1280   }
1281   lrow = row - brstart;
1282 
1283   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1284   if (!v)   {pvA = 0; pvB = 0;}
1285   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1286   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1287   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1288   nztot = nzA + nzB;
1289 
1290   cmap  = mat->garray;
1291   if (v  || idx) {
1292     if (nztot) {
1293       /* Sort by increasing column numbers, assuming A and B already sorted */
1294       PetscInt imark = -1;
1295       if (v) {
1296         *v = v_p = mat->rowvalues;
1297         for (i=0; i<nzB; i++) {
1298           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1299           else break;
1300         }
1301         imark = i;
1302         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1303         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1304       }
1305       if (idx) {
1306         *idx = idx_p = mat->rowindices;
1307         if (imark > -1) {
1308           for (i=0; i<imark; i++) {
1309             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1310           }
1311         } else {
1312           for (i=0; i<nzB; i++) {
1313             if (cmap[cworkB[i]/bs] < cstart)
1314               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1315             else break;
1316           }
1317           imark = i;
1318         }
1319         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1320         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1321       }
1322     } else {
1323       if (idx) *idx = 0;
1324       if (v)   *v   = 0;
1325     }
1326   }
1327   *nz = nztot;
1328   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1329   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1330   PetscFunctionReturn(0);
1331 }
1332 
1333 #undef __FUNCT__
1334 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1335 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1336 {
1337   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1338 
1339   PetscFunctionBegin;
1340   if (!baij->getrowactive) {
1341     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1342   }
1343   baij->getrowactive = PETSC_FALSE;
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 #undef __FUNCT__
1348 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1349 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1350 {
1351   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1352   PetscErrorCode ierr;
1353 
1354   PetscFunctionBegin;
1355   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1356   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1357   PetscFunctionReturn(0);
1358 }
1359 
1360 #undef __FUNCT__
1361 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1362 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1363 {
1364   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1365   Mat            A = a->A,B = a->B;
1366   PetscErrorCode ierr;
1367   PetscReal      isend[5],irecv[5];
1368 
1369   PetscFunctionBegin;
1370   info->block_size     = (PetscReal)matin->rmap->bs;
1371   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1372   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1373   isend[3] = info->memory;  isend[4] = info->mallocs;
1374   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1375   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1376   isend[3] += info->memory;  isend[4] += info->mallocs;
1377   if (flag == MAT_LOCAL) {
1378     info->nz_used      = isend[0];
1379     info->nz_allocated = isend[1];
1380     info->nz_unneeded  = isend[2];
1381     info->memory       = isend[3];
1382     info->mallocs      = isend[4];
1383   } else if (flag == MAT_GLOBAL_MAX) {
1384     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1385     info->nz_used      = irecv[0];
1386     info->nz_allocated = irecv[1];
1387     info->nz_unneeded  = irecv[2];
1388     info->memory       = irecv[3];
1389     info->mallocs      = irecv[4];
1390   } else if (flag == MAT_GLOBAL_SUM) {
1391     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1392     info->nz_used      = irecv[0];
1393     info->nz_allocated = irecv[1];
1394     info->nz_unneeded  = irecv[2];
1395     info->memory       = irecv[3];
1396     info->mallocs      = irecv[4];
1397   } else {
1398     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1399   }
1400   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1401   info->fill_ratio_needed = 0;
1402   info->factor_mallocs    = 0;
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 #undef __FUNCT__
1407 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1408 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg)
1409 {
1410   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1411   PetscErrorCode ierr;
1412 
1413   PetscFunctionBegin;
1414   switch (op) {
1415   case MAT_NEW_NONZERO_LOCATIONS:
1416   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1417   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1418   case MAT_KEEP_NONZERO_PATTERN:
1419   case MAT_NEW_NONZERO_LOCATION_ERR:
1420     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1421     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1422     break;
1423   case MAT_ROW_ORIENTED:
1424     a->roworiented = flg;
1425     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1426     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1427     break;
1428   case MAT_NEW_DIAGONALS:
1429     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1430     break;
1431   case MAT_IGNORE_OFF_PROC_ENTRIES:
1432     a->donotstash = flg;
1433     break;
1434   case MAT_USE_HASH_TABLE:
1435     a->ht_flag = flg;
1436     break;
1437   case MAT_SYMMETRIC:
1438   case MAT_STRUCTURALLY_SYMMETRIC:
1439   case MAT_HERMITIAN:
1440   case MAT_SYMMETRY_ETERNAL:
1441     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1442     break;
1443   default:
1444     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1445   }
1446   PetscFunctionReturn(0);
1447 }
1448 
1449 #undef __FUNCT__
1450 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1451 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1452 {
1453   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1454   Mat_SeqBAIJ    *Aloc;
1455   Mat            B;
1456   PetscErrorCode ierr;
1457   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1458   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1459   MatScalar      *a;
1460 
1461   PetscFunctionBegin;
1462   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1463   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1464     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1465     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1466     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1467     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1468   } else {
1469     B = *matout;
1470   }
1471 
1472   /* copy over the A part */
1473   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1474   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1475   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1476 
1477   for (i=0; i<mbs; i++) {
1478     rvals[0] = bs*(baij->rstartbs + i);
1479     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1480     for (j=ai[i]; j<ai[i+1]; j++) {
1481       col = (baij->cstartbs+aj[j])*bs;
1482       for (k=0; k<bs; k++) {
1483         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1484         col++; a += bs;
1485       }
1486     }
1487   }
1488   /* copy over the B part */
1489   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1490   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1491   for (i=0; i<mbs; i++) {
1492     rvals[0] = bs*(baij->rstartbs + i);
1493     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1494     for (j=ai[i]; j<ai[i+1]; j++) {
1495       col = baij->garray[aj[j]]*bs;
1496       for (k=0; k<bs; k++) {
1497         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1498         col++; a += bs;
1499       }
1500     }
1501   }
1502   ierr = PetscFree(rvals);CHKERRQ(ierr);
1503   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1504   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1505 
1506   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1507     *matout = B;
1508   } else {
1509     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1510   }
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 #undef __FUNCT__
1515 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1516 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1517 {
1518   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1519   Mat            a = baij->A,b = baij->B;
1520   PetscErrorCode ierr;
1521   PetscInt       s1,s2,s3;
1522 
1523   PetscFunctionBegin;
1524   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1525   if (rr) {
1526     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1527     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1528     /* Overlap communication with computation. */
1529     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1530   }
1531   if (ll) {
1532     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1533     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1534     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1535   }
1536   /* scale  the diagonal block */
1537   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1538 
1539   if (rr) {
1540     /* Do a scatter end and then right scale the off-diagonal block */
1541     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1542     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1543   }
1544 
1545   PetscFunctionReturn(0);
1546 }
1547 
1548 #undef __FUNCT__
1549 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1550 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1551 {
1552   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1553   PetscErrorCode ierr;
1554   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1555   PetscInt       i,*owners = A->rmap->range;
1556   PetscInt       *nprocs,j,idx,nsends,row;
1557   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1558   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1559   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1560   MPI_Comm       comm = ((PetscObject)A)->comm;
1561   MPI_Request    *send_waits,*recv_waits;
1562   MPI_Status     recv_status,*send_status;
1563 #if defined(PETSC_DEBUG)
1564   PetscTruth     found = PETSC_FALSE;
1565 #endif
1566 
1567   PetscFunctionBegin;
1568   /*  first count number of contributors to each processor */
1569   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1570   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1571   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1572   j = 0;
1573   for (i=0; i<N; i++) {
1574     if (lastidx > (idx = rows[i])) j = 0;
1575     lastidx = idx;
1576     for (; j<size; j++) {
1577       if (idx >= owners[j] && idx < owners[j+1]) {
1578         nprocs[2*j]++;
1579         nprocs[2*j+1] = 1;
1580         owner[i] = j;
1581 #if defined(PETSC_DEBUG)
1582         found = PETSC_TRUE;
1583 #endif
1584         break;
1585       }
1586     }
1587 #if defined(PETSC_DEBUG)
1588     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1589     found = PETSC_FALSE;
1590 #endif
1591   }
1592   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1593 
1594   /* inform other processors of number of messages and max length*/
1595   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1596 
1597   /* post receives:   */
1598   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1599   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1600   for (i=0; i<nrecvs; i++) {
1601     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1602   }
1603 
1604   /* do sends:
1605      1) starts[i] gives the starting index in svalues for stuff going to
1606      the ith processor
1607   */
1608   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1609   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1610   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1611   starts[0]  = 0;
1612   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1613   for (i=0; i<N; i++) {
1614     svalues[starts[owner[i]]++] = rows[i];
1615   }
1616 
1617   starts[0] = 0;
1618   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1619   count = 0;
1620   for (i=0; i<size; i++) {
1621     if (nprocs[2*i+1]) {
1622       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1623     }
1624   }
1625   ierr = PetscFree(starts);CHKERRQ(ierr);
1626 
1627   base = owners[rank];
1628 
1629   /*  wait on receives */
1630   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1631   count  = nrecvs;
1632   slen = 0;
1633   while (count) {
1634     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1635     /* unpack receives into our local space */
1636     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1637     source[imdex]  = recv_status.MPI_SOURCE;
1638     lens[imdex]    = n;
1639     slen          += n;
1640     count--;
1641   }
1642   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1643 
1644   /* move the data into the send scatter */
1645   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1646   count = 0;
1647   for (i=0; i<nrecvs; i++) {
1648     values = rvalues + i*nmax;
1649     for (j=0; j<lens[i]; j++) {
1650       lrows[count++] = values[j] - base;
1651     }
1652   }
1653   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1654   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1655   ierr = PetscFree(owner);CHKERRQ(ierr);
1656   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1657 
1658   /* actually zap the local rows */
1659   /*
1660         Zero the required rows. If the "diagonal block" of the matrix
1661      is square and the user wishes to set the diagonal we use separate
1662      code so that MatSetValues() is not called for each diagonal allocating
1663      new memory, thus calling lots of mallocs and slowing things down.
1664 
1665   */
1666   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1667   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1668   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1669     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1670   } else if (diag != 0.0) {
1671     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1672     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1673       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1674 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1675     }
1676     for (i=0; i<slen; i++) {
1677       row  = lrows[i] + rstart_bs;
1678       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1679     }
1680     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1681     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1682   } else {
1683     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1684   }
1685 
1686   ierr = PetscFree(lrows);CHKERRQ(ierr);
1687 
1688   /* wait on sends */
1689   if (nsends) {
1690     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1691     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1692     ierr = PetscFree(send_status);CHKERRQ(ierr);
1693   }
1694   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1695   ierr = PetscFree(svalues);CHKERRQ(ierr);
1696 
1697   PetscFunctionReturn(0);
1698 }
1699 
1700 #undef __FUNCT__
1701 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1702 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1703 {
1704   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1705   PetscErrorCode ierr;
1706 
1707   PetscFunctionBegin;
1708   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1709   PetscFunctionReturn(0);
1710 }
1711 
1712 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1713 
1714 #undef __FUNCT__
1715 #define __FUNCT__ "MatEqual_MPIBAIJ"
1716 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1717 {
1718   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1719   Mat            a,b,c,d;
1720   PetscTruth     flg;
1721   PetscErrorCode ierr;
1722 
1723   PetscFunctionBegin;
1724   a = matA->A; b = matA->B;
1725   c = matB->A; d = matB->B;
1726 
1727   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1728   if (flg) {
1729     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1730   }
1731   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1732   PetscFunctionReturn(0);
1733 }
1734 
1735 #undef __FUNCT__
1736 #define __FUNCT__ "MatCopy_MPIBAIJ"
1737 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1738 {
1739   PetscErrorCode ierr;
1740   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1741   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1742 
1743   PetscFunctionBegin;
1744   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1745   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1746     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1747   } else {
1748     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1749     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1750   }
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 #undef __FUNCT__
1755 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1756 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1757 {
1758   PetscErrorCode ierr;
1759 
1760   PetscFunctionBegin;
1761   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1762   PetscFunctionReturn(0);
1763 }
1764 
1765 #undef __FUNCT__
1766 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1767 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1768 {
1769   PetscErrorCode ierr;
1770   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1771   PetscBLASInt   bnz,one=1;
1772   Mat_SeqBAIJ    *x,*y;
1773 
1774   PetscFunctionBegin;
1775   if (str == SAME_NONZERO_PATTERN) {
1776     PetscScalar alpha = a;
1777     x = (Mat_SeqBAIJ *)xx->A->data;
1778     y = (Mat_SeqBAIJ *)yy->A->data;
1779     bnz = PetscBLASIntCast(x->nz);
1780     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1781     x = (Mat_SeqBAIJ *)xx->B->data;
1782     y = (Mat_SeqBAIJ *)yy->B->data;
1783     bnz = PetscBLASIntCast(x->nz);
1784     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1785   } else {
1786     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1787   }
1788   PetscFunctionReturn(0);
1789 }
1790 
1791 #undef __FUNCT__
1792 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ"
1793 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1794 {
1795   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1796   PetscInt rbs,cbs;
1797   PetscErrorCode ierr;
1798 
1799   PetscFunctionBegin;
1800   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1801   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1802   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1803   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1804   if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
1805   if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
1806   PetscFunctionReturn(0);
1807 }
1808 
1809 #undef __FUNCT__
1810 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1811 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1812 {
1813   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1814   PetscErrorCode ierr;
1815 
1816   PetscFunctionBegin;
1817   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1818   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1819   PetscFunctionReturn(0);
1820 }
1821 
1822 #undef __FUNCT__
1823 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1824 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1825 {
1826   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1827   PetscErrorCode ierr;
1828 
1829   PetscFunctionBegin;
1830   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1831   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1832   PetscFunctionReturn(0);
1833 }
1834 
1835 #undef __FUNCT__
1836 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1837 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1838 {
1839   PetscErrorCode ierr;
1840   IS             iscol_local;
1841   PetscInt       csize;
1842 
1843   PetscFunctionBegin;
1844   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1845   if (call == MAT_REUSE_MATRIX) {
1846     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1847     if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1848   } else {
1849     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1850   }
1851   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1852   if (call == MAT_INITIAL_MATRIX) {
1853     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1854     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
1855   }
1856   PetscFunctionReturn(0);
1857 }
1858 
1859 #undef __FUNCT__
1860 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1861 /*
1862     Not great since it makes two copies of the submatrix, first an SeqBAIJ
1863   in local and then by concatenating the local matrices the end result.
1864   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
1865 */
1866 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1867 {
1868   PetscErrorCode ierr;
1869   PetscMPIInt    rank,size;
1870   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1871   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1872   Mat            *local,M,Mreuse;
1873   MatScalar      *vwork,*aa;
1874   MPI_Comm       comm = ((PetscObject)mat)->comm;
1875   Mat_SeqBAIJ    *aij;
1876 
1877 
1878   PetscFunctionBegin;
1879   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1880   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1881 
1882   if (call ==  MAT_REUSE_MATRIX) {
1883     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1884     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1885     local = &Mreuse;
1886     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1887   } else {
1888     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1889     Mreuse = *local;
1890     ierr   = PetscFree(local);CHKERRQ(ierr);
1891   }
1892 
1893   /*
1894       m - number of local rows
1895       n - number of columns (same on all processors)
1896       rstart - first row in new global matrix generated
1897   */
1898   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1899   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1900   m    = m/bs;
1901   n    = n/bs;
1902 
1903   if (call == MAT_INITIAL_MATRIX) {
1904     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1905     ii  = aij->i;
1906     jj  = aij->j;
1907 
1908     /*
1909         Determine the number of non-zeros in the diagonal and off-diagonal
1910         portions of the matrix in order to do correct preallocation
1911     */
1912 
1913     /* first get start and end of "diagonal" columns */
1914     if (csize == PETSC_DECIDE) {
1915       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
1916       if (mglobal == n*bs) { /* square matrix */
1917 	nlocal = m;
1918       } else {
1919         nlocal = n/size + ((n % size) > rank);
1920       }
1921     } else {
1922       nlocal = csize/bs;
1923     }
1924     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1925     rstart = rend - nlocal;
1926     if (rank == size - 1 && rend != n) {
1927       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
1928     }
1929 
1930     /* next, compute all the lengths */
1931     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
1932     olens = dlens + m;
1933     for (i=0; i<m; i++) {
1934       jend = ii[i+1] - ii[i];
1935       olen = 0;
1936       dlen = 0;
1937       for (j=0; j<jend; j++) {
1938         if (*jj < rstart || *jj >= rend) olen++;
1939         else dlen++;
1940         jj++;
1941       }
1942       olens[i] = olen;
1943       dlens[i] = dlen;
1944     }
1945     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
1946     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
1947     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
1948     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
1949     ierr = PetscFree(dlens);CHKERRQ(ierr);
1950   } else {
1951     PetscInt ml,nl;
1952 
1953     M = *newmat;
1954     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1955     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1956     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1957     /*
1958          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1959        rather than the slower MatSetValues().
1960     */
1961     M->was_assembled = PETSC_TRUE;
1962     M->assembled     = PETSC_FALSE;
1963   }
1964   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1965   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1966   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1967   ii  = aij->i;
1968   jj  = aij->j;
1969   aa  = aij->a;
1970   for (i=0; i<m; i++) {
1971     row   = rstart/bs + i;
1972     nz    = ii[i+1] - ii[i];
1973     cwork = jj;     jj += nz;
1974     vwork = aa;     aa += nz;
1975     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1976   }
1977 
1978   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1979   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1980   *newmat = M;
1981 
1982   /* save submatrix used in processor for next request */
1983   if (call ==  MAT_INITIAL_MATRIX) {
1984     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1985     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1986   }
1987 
1988   PetscFunctionReturn(0);
1989 }
1990 
1991 #undef __FUNCT__
1992 #define __FUNCT__ "MatPermute_MPIBAIJ"
1993 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
1994 {
1995   MPI_Comm       comm,pcomm;
1996   PetscInt       first,local_size,nrows;
1997   const PetscInt *rows;
1998   PetscMPIInt    size;
1999   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
2000   PetscErrorCode ierr;
2001 
2002   PetscFunctionBegin;
2003   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2004   /* make a collective version of 'rowp' */
2005   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2006   if (pcomm==comm) {
2007     crowp = rowp;
2008   } else {
2009     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2010     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2011     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
2012     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2013   }
2014   /* collect the global row permutation and invert it */
2015   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2016   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2017   if (pcomm!=comm) {
2018     ierr = ISDestroy(crowp);CHKERRQ(ierr);
2019   }
2020   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2021   /* get the local target indices */
2022   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2023   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2024   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2025   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
2026   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2027   ierr = ISDestroy(irowp);CHKERRQ(ierr);
2028   /* the column permutation is so much easier;
2029      make a local version of 'colp' and invert it */
2030   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2031   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2032   if (size==1) {
2033     lcolp = colp;
2034   } else {
2035     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2036     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2037     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
2038   }
2039   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2040   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2041   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2042   if (size>1) {
2043     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2044     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
2045   }
2046   /* now we just get the submatrix */
2047   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2048   /* clean up */
2049   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
2050   ierr = ISDestroy(icolp);CHKERRQ(ierr);
2051   PetscFunctionReturn(0);
2052 }
2053 
2054 #undef __FUNCT__
2055 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2056 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2057 {
2058   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2059   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2060 
2061   PetscFunctionBegin;
2062   if (nghosts) { *nghosts = B->nbs;}
2063   if (ghosts) {*ghosts = baij->garray;}
2064   PetscFunctionReturn(0);
2065 }
2066 
2067 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2068 
2069 #undef __FUNCT__
2070 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2071 /*
2072     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2073 */
2074 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2075 {
2076   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2077   PetscErrorCode        ierr;
2078   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2079   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2080   const PetscInt        *is;
2081   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2082   PetscInt              *rowhit,M,cstart,cend,colb;
2083   PetscInt              *columnsforrow,l;
2084   IS                    *isa;
2085   PetscTruth             done,flg;
2086   ISLocalToGlobalMapping map = mat->bmapping;
2087   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2088 
2089   PetscFunctionBegin;
2090   if (!mat->assembled) {
2091     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2092   }
2093   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2094 
2095   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2096 
2097   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2098   M                = mat->rmap->n/bs;
2099   cstart           = mat->cmap->rstart/bs;
2100   cend             = mat->cmap->rend/bs;
2101   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2102   c->N             = mat->cmap->N/bs;
2103   c->m             = mat->rmap->n/bs;
2104   c->rstart        = mat->rmap->rstart/bs;
2105 
2106   c->ncolors       = nis;
2107   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2108   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2109   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2110   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2111   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2112   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2113 
2114   /* Allow access to data structures of local part of matrix */
2115   if (!baij->colmap) {
2116     ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2117   }
2118   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2119   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2120 
2121   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2122   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2123 
2124   for (i=0; i<nis; i++) {
2125     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2126     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2127     c->ncolumns[i] = n;
2128     if (n) {
2129       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2130       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2131       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2132     } else {
2133       c->columns[i]  = 0;
2134     }
2135 
2136     if (ctype == IS_COLORING_GLOBAL){
2137       /* Determine the total (parallel) number of columns of this color */
2138       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2139       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2140 
2141       nn   = PetscMPIIntCast(n);
2142       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2143       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2144       if (!nctot) {
2145         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2146       }
2147 
2148       disp[0] = 0;
2149       for (j=1; j<size; j++) {
2150         disp[j] = disp[j-1] + ncolsonproc[j-1];
2151       }
2152 
2153       /* Get complete list of columns for color on each processor */
2154       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2155       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2156       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2157     } else if (ctype == IS_COLORING_GHOSTED){
2158       /* Determine local number of columns of this color on this process, including ghost points */
2159       nctot = n;
2160       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2161       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2162     } else {
2163       SETERRQ(PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2164     }
2165 
2166     /*
2167        Mark all rows affect by these columns
2168     */
2169     /* Temporary option to allow for debugging/testing */
2170     flg  = PETSC_FALSE;
2171     ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2172     if (!flg) {/*-----------------------------------------------------------------------------*/
2173       /* crude, fast version */
2174       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2175       /* loop over columns*/
2176       for (j=0; j<nctot; j++) {
2177         if (ctype == IS_COLORING_GHOSTED) {
2178           col = ltog[cols[j]];
2179         } else {
2180           col  = cols[j];
2181         }
2182         if (col >= cstart && col < cend) {
2183           /* column is in diagonal block of matrix */
2184           rows = A_cj + A_ci[col-cstart];
2185           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2186         } else {
2187 #if defined (PETSC_USE_CTABLE)
2188           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr)
2189 	  colb --;
2190 #else
2191           colb = baij->colmap[col] - 1;
2192 #endif
2193           if (colb == -1) {
2194             m = 0;
2195           } else {
2196             colb = colb/bs;
2197             rows = B_cj + B_ci[colb];
2198             m    = B_ci[colb+1] - B_ci[colb];
2199           }
2200         }
2201         /* loop over columns marking them in rowhit */
2202         for (k=0; k<m; k++) {
2203           rowhit[*rows++] = col + 1;
2204         }
2205       }
2206 
2207       /* count the number of hits */
2208       nrows = 0;
2209       for (j=0; j<M; j++) {
2210         if (rowhit[j]) nrows++;
2211       }
2212       c->nrows[i]         = nrows;
2213       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2214       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2215       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2216       nrows = 0;
2217       for (j=0; j<M; j++) {
2218         if (rowhit[j]) {
2219           c->rows[i][nrows]           = j;
2220           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2221           nrows++;
2222         }
2223       }
2224     } else {/*-------------------------------------------------------------------------------*/
2225       /* slow version, using rowhit as a linked list */
2226       PetscInt currentcol,fm,mfm;
2227       rowhit[M] = M;
2228       nrows     = 0;
2229       /* loop over columns*/
2230       for (j=0; j<nctot; j++) {
2231         if (ctype == IS_COLORING_GHOSTED) {
2232           col = ltog[cols[j]];
2233         } else {
2234           col  = cols[j];
2235         }
2236         if (col >= cstart && col < cend) {
2237           /* column is in diagonal block of matrix */
2238           rows = A_cj + A_ci[col-cstart];
2239           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2240         } else {
2241 #if defined (PETSC_USE_CTABLE)
2242           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2243           colb --;
2244 #else
2245           colb = baij->colmap[col] - 1;
2246 #endif
2247           if (colb == -1) {
2248             m = 0;
2249           } else {
2250             colb = colb/bs;
2251             rows = B_cj + B_ci[colb];
2252             m    = B_ci[colb+1] - B_ci[colb];
2253           }
2254         }
2255 
2256         /* loop over columns marking them in rowhit */
2257         fm    = M; /* fm points to first entry in linked list */
2258         for (k=0; k<m; k++) {
2259           currentcol = *rows++;
2260 	  /* is it already in the list? */
2261           do {
2262             mfm  = fm;
2263             fm   = rowhit[fm];
2264           } while (fm < currentcol);
2265           /* not in list so add it */
2266           if (fm != currentcol) {
2267             nrows++;
2268             columnsforrow[currentcol] = col;
2269             /* next three lines insert new entry into linked list */
2270             rowhit[mfm]               = currentcol;
2271             rowhit[currentcol]        = fm;
2272             fm                        = currentcol;
2273             /* fm points to present position in list since we know the columns are sorted */
2274           } else {
2275             SETERRQ(PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2276           }
2277         }
2278       }
2279       c->nrows[i]         = nrows;
2280       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2281       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2282       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2283       /* now store the linked list of rows into c->rows[i] */
2284       nrows = 0;
2285       fm    = rowhit[M];
2286       do {
2287         c->rows[i][nrows]            = fm;
2288         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2289         fm                           = rowhit[fm];
2290       } while (fm < M);
2291     } /* ---------------------------------------------------------------------------------------*/
2292     ierr = PetscFree(cols);CHKERRQ(ierr);
2293   }
2294 
2295   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2296   /*
2297        vscale will contain the "diagonal" on processor scalings followed by the off processor
2298   */
2299   if (ctype == IS_COLORING_GLOBAL) {
2300     PetscInt *garray;
2301     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2302     for (i=0; i<baij->B->cmap->n/bs; i++) {
2303       for (j=0; j<bs; j++) {
2304         garray[i*bs+j] = bs*baij->garray[i]+j;
2305       }
2306     }
2307     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2308     ierr = PetscFree(garray);CHKERRQ(ierr);
2309     CHKMEMQ;
2310     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2311     for (k=0; k<c->ncolors; k++) {
2312       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2313       for (l=0; l<c->nrows[k]; l++) {
2314         col = c->columnsforrow[k][l];
2315         if (col >= cstart && col < cend) {
2316           /* column is in diagonal block of matrix */
2317           colb = col - cstart;
2318         } else {
2319           /* column  is in "off-processor" part */
2320 #if defined (PETSC_USE_CTABLE)
2321           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2322           colb --;
2323 #else
2324           colb = baij->colmap[col] - 1;
2325 #endif
2326           colb = colb/bs;
2327           colb += cend - cstart;
2328         }
2329         c->vscaleforrow[k][l] = colb;
2330       }
2331     }
2332   } else if (ctype == IS_COLORING_GHOSTED) {
2333     /* Get gtol mapping */
2334     PetscInt N = mat->cmap->N, *gtol;
2335     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2336     for (i=0; i<N; i++) gtol[i] = -1;
2337     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2338 
2339     c->vscale = 0; /* will be created in MatFDColoringApply() */
2340     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2341     for (k=0; k<c->ncolors; k++) {
2342       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2343       for (l=0; l<c->nrows[k]; l++) {
2344         col = c->columnsforrow[k][l];      /* global column index */
2345         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2346       }
2347     }
2348     ierr = PetscFree(gtol);CHKERRQ(ierr);
2349   }
2350   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2351 
2352   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2353   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2354   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2355   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2356     CHKMEMQ;
2357   PetscFunctionReturn(0);
2358 }
2359 
2360 #undef __FUNCT__
2361 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ"
2362 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat)
2363 {
2364   Mat            B;
2365   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2366   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2367   Mat_SeqAIJ     *b;
2368   PetscErrorCode ierr;
2369   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2370   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2371   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2372 
2373   PetscFunctionBegin;
2374   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2375   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2376 
2377   /* ----------------------------------------------------------------
2378      Tell every processor the number of nonzeros per row
2379   */
2380   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2381   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2382     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2383   }
2384   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2385   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2386   displs     = recvcounts + size;
2387   for (i=0; i<size; i++) {
2388     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2389     displs[i]     = A->rmap->range[i]/bs;
2390   }
2391 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2392   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2393 #else
2394   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2395 #endif
2396   /* ---------------------------------------------------------------
2397      Create the sequential matrix of the same type as the local block diagonal
2398   */
2399   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2400   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2401   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2402   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2403   b = (Mat_SeqAIJ *)B->data;
2404 
2405   /*--------------------------------------------------------------------
2406     Copy my part of matrix column indices over
2407   */
2408   sendcount  = ad->nz + bd->nz;
2409   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2410   a_jsendbuf = ad->j;
2411   b_jsendbuf = bd->j;
2412   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2413   cnt        = 0;
2414   for (i=0; i<n; i++) {
2415 
2416     /* put in lower diagonal portion */
2417     m = bd->i[i+1] - bd->i[i];
2418     while (m > 0) {
2419       /* is it above diagonal (in bd (compressed) numbering) */
2420       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2421       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2422       m--;
2423     }
2424 
2425     /* put in diagonal portion */
2426     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2427       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2428     }
2429 
2430     /* put in upper diagonal portion */
2431     while (m-- > 0) {
2432       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2433     }
2434   }
2435   if (cnt != sendcount) SETERRQ2(PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2436 
2437   /*--------------------------------------------------------------------
2438     Gather all column indices to all processors
2439   */
2440   for (i=0; i<size; i++) {
2441     recvcounts[i] = 0;
2442     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2443       recvcounts[i] += lens[j];
2444     }
2445   }
2446   displs[0]  = 0;
2447   for (i=1; i<size; i++) {
2448     displs[i] = displs[i-1] + recvcounts[i-1];
2449   }
2450 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2451   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2452 #else
2453   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2454 #endif
2455   /*--------------------------------------------------------------------
2456     Assemble the matrix into useable form (note numerical values not yet set)
2457   */
2458   /* set the b->ilen (length of each row) values */
2459   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2460   /* set the b->i indices */
2461   b->i[0] = 0;
2462   for (i=1; i<=A->rmap->N/bs; i++) {
2463     b->i[i] = b->i[i-1] + lens[i-1];
2464   }
2465   ierr = PetscFree(lens);CHKERRQ(ierr);
2466   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2467   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2468   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2469 
2470   if (A->symmetric){
2471     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2472   } else if (A->hermitian) {
2473     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2474   } else if (A->structurally_symmetric) {
2475     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2476   }
2477   *newmat = B;
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 #undef __FUNCT__
2482 #define __FUNCT__ "MatSOR_MPIBAIJ"
2483 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2484 {
2485   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2486   PetscErrorCode ierr;
2487   Vec            bb1 = 0;
2488 
2489   PetscFunctionBegin;
2490   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2491     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2492   }
2493 
2494   if (flag == SOR_APPLY_UPPER) {
2495     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2496     PetscFunctionReturn(0);
2497   }
2498 
2499   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2500     if (flag & SOR_ZERO_INITIAL_GUESS) {
2501       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2502       its--;
2503     }
2504 
2505     while (its--) {
2506       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2507       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2508 
2509       /* update rhs: bb1 = bb - B*x */
2510       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2511       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2512 
2513       /* local sweep */
2514       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2515     }
2516   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2517     if (flag & SOR_ZERO_INITIAL_GUESS) {
2518       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2519       its--;
2520     }
2521     while (its--) {
2522       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2523       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2524 
2525       /* update rhs: bb1 = bb - B*x */
2526       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2527       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2528 
2529       /* local sweep */
2530       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2531     }
2532   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2533     if (flag & SOR_ZERO_INITIAL_GUESS) {
2534       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2535       its--;
2536     }
2537     while (its--) {
2538       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2539       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2540 
2541       /* update rhs: bb1 = bb - B*x */
2542       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2543       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2544 
2545       /* local sweep */
2546       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2547     }
2548   } else {
2549     SETERRQ(PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2550   }
2551 
2552   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
2553   PetscFunctionReturn(0);
2554 }
2555 
2556 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2557 
2558 
2559 /* -------------------------------------------------------------------*/
2560 static struct _MatOps MatOps_Values = {
2561        MatSetValues_MPIBAIJ,
2562        MatGetRow_MPIBAIJ,
2563        MatRestoreRow_MPIBAIJ,
2564        MatMult_MPIBAIJ,
2565 /* 4*/ MatMultAdd_MPIBAIJ,
2566        MatMultTranspose_MPIBAIJ,
2567        MatMultTransposeAdd_MPIBAIJ,
2568        0,
2569        0,
2570        0,
2571 /*10*/ 0,
2572        0,
2573        0,
2574        MatSOR_MPIBAIJ,
2575        MatTranspose_MPIBAIJ,
2576 /*15*/ MatGetInfo_MPIBAIJ,
2577        MatEqual_MPIBAIJ,
2578        MatGetDiagonal_MPIBAIJ,
2579        MatDiagonalScale_MPIBAIJ,
2580        MatNorm_MPIBAIJ,
2581 /*20*/ MatAssemblyBegin_MPIBAIJ,
2582        MatAssemblyEnd_MPIBAIJ,
2583        MatSetOption_MPIBAIJ,
2584        MatZeroEntries_MPIBAIJ,
2585 /*24*/ MatZeroRows_MPIBAIJ,
2586        0,
2587        0,
2588        0,
2589        0,
2590 /*29*/ MatSetUpPreallocation_MPIBAIJ,
2591        0,
2592        0,
2593        0,
2594        0,
2595 /*34*/ MatDuplicate_MPIBAIJ,
2596        0,
2597        0,
2598        0,
2599        0,
2600 /*39*/ MatAXPY_MPIBAIJ,
2601        MatGetSubMatrices_MPIBAIJ,
2602        MatIncreaseOverlap_MPIBAIJ,
2603        MatGetValues_MPIBAIJ,
2604        MatCopy_MPIBAIJ,
2605 /*44*/ 0,
2606        MatScale_MPIBAIJ,
2607        0,
2608        0,
2609        0,
2610 /*49*/ MatSetBlockSize_MPIBAIJ,
2611        0,
2612        0,
2613        0,
2614        0,
2615 /*54*/ MatFDColoringCreate_MPIBAIJ,
2616        0,
2617        MatSetUnfactored_MPIBAIJ,
2618        MatPermute_MPIBAIJ,
2619        MatSetValuesBlocked_MPIBAIJ,
2620 /*59*/ MatGetSubMatrix_MPIBAIJ,
2621        MatDestroy_MPIBAIJ,
2622        MatView_MPIBAIJ,
2623        0,
2624        0,
2625 /*64*/ 0,
2626        0,
2627        0,
2628        0,
2629        0,
2630 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2631        0,
2632        0,
2633        0,
2634        0,
2635 /*74*/ 0,
2636        MatFDColoringApply_BAIJ,
2637        0,
2638        0,
2639        0,
2640 /*79*/ 0,
2641        0,
2642        0,
2643        0,
2644        MatLoad_MPIBAIJ,
2645 /*84*/ 0,
2646        0,
2647        0,
2648        0,
2649        0,
2650 /*89*/ 0,
2651        0,
2652        0,
2653        0,
2654        0,
2655 /*94*/ 0,
2656        0,
2657        0,
2658        0,
2659        0,
2660 /*99*/ 0,
2661        0,
2662        0,
2663        0,
2664        0,
2665 /*104*/0,
2666        MatRealPart_MPIBAIJ,
2667        MatImaginaryPart_MPIBAIJ,
2668        0,
2669        0,
2670 /*109*/0,
2671        0,
2672        0,
2673        0,
2674        0,
2675 /*114*/MatGetSeqNonzerostructure_MPIBAIJ,
2676        0,
2677        MatGetGhosts_MPIBAIJ
2678 };
2679 
2680 EXTERN_C_BEGIN
2681 #undef __FUNCT__
2682 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2683 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2684 {
2685   PetscFunctionBegin;
2686   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2687   *iscopy = PETSC_FALSE;
2688   PetscFunctionReturn(0);
2689 }
2690 EXTERN_C_END
2691 
2692 EXTERN_C_BEGIN
2693 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2694 EXTERN_C_END
2695 
2696 EXTERN_C_BEGIN
2697 #undef __FUNCT__
2698 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2699 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2700 {
2701   PetscInt       m,rstart,cstart,cend;
2702   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2703   const PetscInt *JJ=0;
2704   PetscScalar    *values=0;
2705   PetscErrorCode ierr;
2706 
2707   PetscFunctionBegin;
2708 
2709   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2710   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2711   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2712   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2713   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2714   m      = B->rmap->n/bs;
2715   rstart = B->rmap->rstart/bs;
2716   cstart = B->cmap->rstart/bs;
2717   cend   = B->cmap->rend/bs;
2718 
2719   if (ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2720   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2721   for (i=0; i<m; i++) {
2722     nz = ii[i+1] - ii[i];
2723     if (nz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2724     nz_max = PetscMax(nz_max,nz);
2725     JJ  = jj + ii[i];
2726     for (j=0; j<nz; j++) {
2727       if (*JJ >= cstart) break;
2728       JJ++;
2729     }
2730     d = 0;
2731     for (; j<nz; j++) {
2732       if (*JJ++ >= cend) break;
2733       d++;
2734     }
2735     d_nnz[i] = d;
2736     o_nnz[i] = nz - d;
2737   }
2738   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2739   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2740 
2741   values = (PetscScalar*)V;
2742   if (!values) {
2743     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2744     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2745   }
2746   for (i=0; i<m; i++) {
2747     PetscInt          row    = i + rstart;
2748     PetscInt          ncols  = ii[i+1] - ii[i];
2749     const PetscInt    *icols = jj + ii[i];
2750     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2751     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2752   }
2753 
2754   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2755   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2756   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2757 
2758   PetscFunctionReturn(0);
2759 }
2760 EXTERN_C_END
2761 
2762 #undef __FUNCT__
2763 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2764 /*@C
2765    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2766    (the default parallel PETSc format).
2767 
2768    Collective on MPI_Comm
2769 
2770    Input Parameters:
2771 +  A - the matrix
2772 .  i - the indices into j for the start of each local row (starts with zero)
2773 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2774 -  v - optional values in the matrix
2775 
2776    Level: developer
2777 
2778 .keywords: matrix, aij, compressed row, sparse, parallel
2779 
2780 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2781 @*/
2782 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2783 {
2784   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2785 
2786   PetscFunctionBegin;
2787   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2788   if (f) {
2789     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2790   }
2791   PetscFunctionReturn(0);
2792 }
2793 
2794 EXTERN_C_BEGIN
2795 #undef __FUNCT__
2796 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2797 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2798 {
2799   Mat_MPIBAIJ    *b;
2800   PetscErrorCode ierr;
2801   PetscInt       i, newbs = PetscAbs(bs);
2802 
2803   PetscFunctionBegin;
2804   if (bs < 0) {
2805     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2806       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2807     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2808     bs   = PetscAbs(bs);
2809   }
2810   if ((d_nnz || o_nnz) && newbs != bs) {
2811     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
2812   }
2813   bs = newbs;
2814 
2815 
2816   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2817   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2818   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2819   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2820   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2821 
2822   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2823   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2824   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2825   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2826 
2827   if (d_nnz) {
2828     for (i=0; i<B->rmap->n/bs; i++) {
2829       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2830     }
2831   }
2832   if (o_nnz) {
2833     for (i=0; i<B->rmap->n/bs; i++) {
2834       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2835     }
2836   }
2837 
2838   b = (Mat_MPIBAIJ*)B->data;
2839   b->bs2 = bs*bs;
2840   b->mbs = B->rmap->n/bs;
2841   b->nbs = B->cmap->n/bs;
2842   b->Mbs = B->rmap->N/bs;
2843   b->Nbs = B->cmap->N/bs;
2844 
2845   for (i=0; i<=b->size; i++) {
2846     b->rangebs[i] = B->rmap->range[i]/bs;
2847   }
2848   b->rstartbs = B->rmap->rstart/bs;
2849   b->rendbs   = B->rmap->rend/bs;
2850   b->cstartbs = B->cmap->rstart/bs;
2851   b->cendbs   = B->cmap->rend/bs;
2852 
2853   if (!B->preallocated) {
2854     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2855     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2856     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2857     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2858     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2859     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2860     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2861     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2862     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
2863   }
2864 
2865   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2866   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2867   B->preallocated = PETSC_TRUE;
2868   PetscFunctionReturn(0);
2869 }
2870 EXTERN_C_END
2871 
2872 EXTERN_C_BEGIN
2873 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2874 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2875 EXTERN_C_END
2876 
2877 
2878 EXTERN_C_BEGIN
2879 #undef __FUNCT__
2880 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2881 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
2882 {
2883   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2884   PetscErrorCode ierr;
2885   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2886   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2887   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2888 
2889   PetscFunctionBegin;
2890   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
2891   ii[0] = 0;
2892   CHKMEMQ;
2893   for (i=0; i<M; i++) {
2894     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2895     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2896     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2897     /* remove one from count of matrix has diagonal */
2898     for (j=id[i]; j<id[i+1]; j++) {
2899       if (jd[j] == i) {ii[i+1]--;break;}
2900     }
2901   CHKMEMQ;
2902   }
2903   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2904   cnt = 0;
2905   for (i=0; i<M; i++) {
2906     for (j=io[i]; j<io[i+1]; j++) {
2907       if (garray[jo[j]] > rstart) break;
2908       jj[cnt++] = garray[jo[j]];
2909   CHKMEMQ;
2910     }
2911     for (k=id[i]; k<id[i+1]; k++) {
2912       if (jd[k] != i) {
2913         jj[cnt++] = rstart + jd[k];
2914   CHKMEMQ;
2915       }
2916     }
2917     for (;j<io[i+1]; j++) {
2918       jj[cnt++] = garray[jo[j]];
2919   CHKMEMQ;
2920     }
2921   }
2922   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
2923   PetscFunctionReturn(0);
2924 }
2925 EXTERN_C_END
2926 
2927 /*MC
2928    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2929 
2930    Options Database Keys:
2931 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2932 . -mat_block_size <bs> - set the blocksize used to store the matrix
2933 - -mat_use_hash_table <fact>
2934 
2935   Level: beginner
2936 
2937 .seealso: MatCreateMPIBAIJ
2938 M*/
2939 
2940 EXTERN_C_BEGIN
2941 #undef __FUNCT__
2942 #define __FUNCT__ "MatCreate_MPIBAIJ"
2943 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2944 {
2945   Mat_MPIBAIJ    *b;
2946   PetscErrorCode ierr;
2947   PetscTruth     flg;
2948 
2949   PetscFunctionBegin;
2950   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2951   B->data = (void*)b;
2952 
2953 
2954   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2955   B->mapping    = 0;
2956   B->assembled  = PETSC_FALSE;
2957 
2958   B->insertmode = NOT_SET_VALUES;
2959   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
2960   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
2961 
2962   /* build local table of row and column ownerships */
2963   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
2964 
2965   /* build cache for off array entries formed */
2966   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
2967   b->donotstash  = PETSC_FALSE;
2968   b->colmap      = PETSC_NULL;
2969   b->garray      = PETSC_NULL;
2970   b->roworiented = PETSC_TRUE;
2971 
2972   /* stuff used in block assembly */
2973   b->barray       = 0;
2974 
2975   /* stuff used for matrix vector multiply */
2976   b->lvec         = 0;
2977   b->Mvctx        = 0;
2978 
2979   /* stuff for MatGetRow() */
2980   b->rowindices   = 0;
2981   b->rowvalues    = 0;
2982   b->getrowactive = PETSC_FALSE;
2983 
2984   /* hash table stuff */
2985   b->ht           = 0;
2986   b->hd           = 0;
2987   b->ht_size      = 0;
2988   b->ht_flag      = PETSC_FALSE;
2989   b->ht_fact      = 0;
2990   b->ht_total_ct  = 0;
2991   b->ht_insert_ct = 0;
2992 
2993   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2994     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2995     if (flg) {
2996       PetscReal fact = 1.39;
2997       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2998       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
2999       if (fact <= 1.0) fact = 1.39;
3000       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3001       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3002     }
3003   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3004 
3005   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3006                                      "MatConvert_MPIBAIJ_MPIAdj",
3007                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3008   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3009                                      "MatStoreValues_MPIBAIJ",
3010                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3011   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3012                                      "MatRetrieveValues_MPIBAIJ",
3013                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3014   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3015                                      "MatGetDiagonalBlock_MPIBAIJ",
3016                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3017   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3018                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3019                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3020   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3021 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3022 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3023   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3024                                      "MatDiagonalScaleLocal_MPIBAIJ",
3025                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3026   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3027                                      "MatSetHashTableFactor_MPIBAIJ",
3028                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3029   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3030   PetscFunctionReturn(0);
3031 }
3032 EXTERN_C_END
3033 
3034 /*MC
3035    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3036 
3037    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3038    and MATMPIBAIJ otherwise.
3039 
3040    Options Database Keys:
3041 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3042 
3043   Level: beginner
3044 
3045 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3046 M*/
3047 
3048 EXTERN_C_BEGIN
3049 #undef __FUNCT__
3050 #define __FUNCT__ "MatCreate_BAIJ"
3051 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
3052 {
3053   PetscErrorCode ierr;
3054   PetscMPIInt    size;
3055 
3056   PetscFunctionBegin;
3057   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3058   if (size == 1) {
3059     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
3060   } else {
3061     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
3062   }
3063   PetscFunctionReturn(0);
3064 }
3065 EXTERN_C_END
3066 
3067 #undef __FUNCT__
3068 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3069 /*@C
3070    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3071    (block compressed row).  For good matrix assembly performance
3072    the user should preallocate the matrix storage by setting the parameters
3073    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3074    performance can be increased by more than a factor of 50.
3075 
3076    Collective on Mat
3077 
3078    Input Parameters:
3079 +  A - the matrix
3080 .  bs   - size of blockk
3081 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3082            submatrix  (same for all local rows)
3083 .  d_nnz - array containing the number of block nonzeros in the various block rows
3084            of the in diagonal portion of the local (possibly different for each block
3085            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3086 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3087            submatrix (same for all local rows).
3088 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3089            off-diagonal portion of the local submatrix (possibly different for
3090            each block row) or PETSC_NULL.
3091 
3092    If the *_nnz parameter is given then the *_nz parameter is ignored
3093 
3094    Options Database Keys:
3095 +   -mat_block_size - size of the blocks to use
3096 -   -mat_use_hash_table <fact>
3097 
3098    Notes:
3099    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3100    than it must be used on all processors that share the object for that argument.
3101 
3102    Storage Information:
3103    For a square global matrix we define each processor's diagonal portion
3104    to be its local rows and the corresponding columns (a square submatrix);
3105    each processor's off-diagonal portion encompasses the remainder of the
3106    local matrix (a rectangular submatrix).
3107 
3108    The user can specify preallocated storage for the diagonal part of
3109    the local submatrix with either d_nz or d_nnz (not both).  Set
3110    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3111    memory allocation.  Likewise, specify preallocated storage for the
3112    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3113 
3114    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3115    the figure below we depict these three local rows and all columns (0-11).
3116 
3117 .vb
3118            0 1 2 3 4 5 6 7 8 9 10 11
3119           -------------------
3120    row 3  |  o o o d d d o o o o o o
3121    row 4  |  o o o d d d o o o o o o
3122    row 5  |  o o o d d d o o o o o o
3123           -------------------
3124 .ve
3125 
3126    Thus, any entries in the d locations are stored in the d (diagonal)
3127    submatrix, and any entries in the o locations are stored in the
3128    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3129    stored simply in the MATSEQBAIJ format for compressed row storage.
3130 
3131    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3132    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3133    In general, for PDE problems in which most nonzeros are near the diagonal,
3134    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3135    or you will get TERRIBLE performance; see the users' manual chapter on
3136    matrices.
3137 
3138    You can call MatGetInfo() to get information on how effective the preallocation was;
3139    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3140    You can also run with the option -info and look for messages with the string
3141    malloc in them to see if additional memory allocation was needed.
3142 
3143    Level: intermediate
3144 
3145 .keywords: matrix, block, aij, compressed row, sparse, parallel
3146 
3147 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3148 @*/
3149 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3150 {
3151   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3152 
3153   PetscFunctionBegin;
3154   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3155   if (f) {
3156     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3157   }
3158   PetscFunctionReturn(0);
3159 }
3160 
3161 #undef __FUNCT__
3162 #define __FUNCT__ "MatCreateMPIBAIJ"
3163 /*@C
3164    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3165    (block compressed row).  For good matrix assembly performance
3166    the user should preallocate the matrix storage by setting the parameters
3167    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3168    performance can be increased by more than a factor of 50.
3169 
3170    Collective on MPI_Comm
3171 
3172    Input Parameters:
3173 +  comm - MPI communicator
3174 .  bs   - size of blockk
3175 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3176            This value should be the same as the local size used in creating the
3177            y vector for the matrix-vector product y = Ax.
3178 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3179            This value should be the same as the local size used in creating the
3180            x vector for the matrix-vector product y = Ax.
3181 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3182 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3183 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3184            submatrix  (same for all local rows)
3185 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3186            of the in diagonal portion of the local (possibly different for each block
3187            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3188 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3189            submatrix (same for all local rows).
3190 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3191            off-diagonal portion of the local submatrix (possibly different for
3192            each block row) or PETSC_NULL.
3193 
3194    Output Parameter:
3195 .  A - the matrix
3196 
3197    Options Database Keys:
3198 +   -mat_block_size - size of the blocks to use
3199 -   -mat_use_hash_table <fact>
3200 
3201    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3202    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3203    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3204 
3205    Notes:
3206    If the *_nnz parameter is given then the *_nz parameter is ignored
3207 
3208    A nonzero block is any block that as 1 or more nonzeros in it
3209 
3210    The user MUST specify either the local or global matrix dimensions
3211    (possibly both).
3212 
3213    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3214    than it must be used on all processors that share the object for that argument.
3215 
3216    Storage Information:
3217    For a square global matrix we define each processor's diagonal portion
3218    to be its local rows and the corresponding columns (a square submatrix);
3219    each processor's off-diagonal portion encompasses the remainder of the
3220    local matrix (a rectangular submatrix).
3221 
3222    The user can specify preallocated storage for the diagonal part of
3223    the local submatrix with either d_nz or d_nnz (not both).  Set
3224    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3225    memory allocation.  Likewise, specify preallocated storage for the
3226    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3227 
3228    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3229    the figure below we depict these three local rows and all columns (0-11).
3230 
3231 .vb
3232            0 1 2 3 4 5 6 7 8 9 10 11
3233           -------------------
3234    row 3  |  o o o d d d o o o o o o
3235    row 4  |  o o o d d d o o o o o o
3236    row 5  |  o o o d d d o o o o o o
3237           -------------------
3238 .ve
3239 
3240    Thus, any entries in the d locations are stored in the d (diagonal)
3241    submatrix, and any entries in the o locations are stored in the
3242    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3243    stored simply in the MATSEQBAIJ format for compressed row storage.
3244 
3245    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3246    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3247    In general, for PDE problems in which most nonzeros are near the diagonal,
3248    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3249    or you will get TERRIBLE performance; see the users' manual chapter on
3250    matrices.
3251 
3252    Level: intermediate
3253 
3254 .keywords: matrix, block, aij, compressed row, sparse, parallel
3255 
3256 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3257 @*/
3258 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3259 {
3260   PetscErrorCode ierr;
3261   PetscMPIInt    size;
3262 
3263   PetscFunctionBegin;
3264   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3265   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3266   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3267   if (size > 1) {
3268     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3269     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3270   } else {
3271     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3272     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3273   }
3274   PetscFunctionReturn(0);
3275 }
3276 
3277 #undef __FUNCT__
3278 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3279 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3280 {
3281   Mat            mat;
3282   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3283   PetscErrorCode ierr;
3284   PetscInt       len=0;
3285 
3286   PetscFunctionBegin;
3287   *newmat       = 0;
3288   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3289   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3290   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3291   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3292 
3293   mat->factor       = matin->factor;
3294   mat->preallocated = PETSC_TRUE;
3295   mat->assembled    = PETSC_TRUE;
3296   mat->insertmode   = NOT_SET_VALUES;
3297 
3298   a      = (Mat_MPIBAIJ*)mat->data;
3299   mat->rmap->bs  = matin->rmap->bs;
3300   a->bs2   = oldmat->bs2;
3301   a->mbs   = oldmat->mbs;
3302   a->nbs   = oldmat->nbs;
3303   a->Mbs   = oldmat->Mbs;
3304   a->Nbs   = oldmat->Nbs;
3305 
3306   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3307   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3308 
3309   a->size         = oldmat->size;
3310   a->rank         = oldmat->rank;
3311   a->donotstash   = oldmat->donotstash;
3312   a->roworiented  = oldmat->roworiented;
3313   a->rowindices   = 0;
3314   a->rowvalues    = 0;
3315   a->getrowactive = PETSC_FALSE;
3316   a->barray       = 0;
3317   a->rstartbs     = oldmat->rstartbs;
3318   a->rendbs       = oldmat->rendbs;
3319   a->cstartbs     = oldmat->cstartbs;
3320   a->cendbs       = oldmat->cendbs;
3321 
3322   /* hash table stuff */
3323   a->ht           = 0;
3324   a->hd           = 0;
3325   a->ht_size      = 0;
3326   a->ht_flag      = oldmat->ht_flag;
3327   a->ht_fact      = oldmat->ht_fact;
3328   a->ht_total_ct  = 0;
3329   a->ht_insert_ct = 0;
3330 
3331   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3332   if (oldmat->colmap) {
3333 #if defined (PETSC_USE_CTABLE)
3334   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3335 #else
3336   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3337   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3338   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3339 #endif
3340   } else a->colmap = 0;
3341 
3342   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3343     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3344     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3345     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3346   } else a->garray = 0;
3347 
3348   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3349   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3350   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3351   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3352   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3353 
3354   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3355   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3356   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3357   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3358   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3359   *newmat = mat;
3360 
3361   PetscFunctionReturn(0);
3362 }
3363 
3364 #undef __FUNCT__
3365 #define __FUNCT__ "MatLoad_MPIBAIJ"
3366 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
3367 {
3368   Mat            A;
3369   PetscErrorCode ierr;
3370   int            fd;
3371   PetscInt       i,nz,j,rstart,rend;
3372   PetscScalar    *vals,*buf;
3373   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3374   MPI_Status     status;
3375   PetscMPIInt    rank,size,maxnz;
3376   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3377   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3378   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3379   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3380   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3381   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
3382 
3383   PetscFunctionBegin;
3384   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3385     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3386   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3387 
3388   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3389   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3390   if (!rank) {
3391     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3392     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3393     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3394   }
3395 
3396   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3397   M = header[1]; N = header[2];
3398 
3399   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
3400 
3401   /*
3402      This code adds extra rows to make sure the number of rows is
3403      divisible by the blocksize
3404   */
3405   Mbs        = M/bs;
3406   extra_rows = bs - M + bs*Mbs;
3407   if (extra_rows == bs) extra_rows = 0;
3408   else                  Mbs++;
3409   if (extra_rows && !rank) {
3410     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3411   }
3412 
3413   /* determine ownership of all rows */
3414   mbs        = Mbs/size + ((Mbs % size) > rank);
3415   m          = mbs*bs;
3416   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3417   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3418 
3419   /* process 0 needs enough room for process with most rows */
3420   if (!rank) {
3421     mmax = rowners[1];
3422     for (i=2; i<size; i++) {
3423       mmax = PetscMax(mmax,rowners[i]);
3424     }
3425     mmax*=bs;
3426   } else mmax = m;
3427 
3428   rowners[0] = 0;
3429   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3430   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3431   rstart = rowners[rank];
3432   rend   = rowners[rank+1];
3433 
3434   /* distribute row lengths to all processors */
3435   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3436   if (!rank) {
3437     mend = m;
3438     if (size == 1) mend = mend - extra_rows;
3439     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3440     for (j=mend; j<m; j++) locrowlens[j] = 1;
3441     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3442     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3443     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3444     for (j=0; j<m; j++) {
3445       procsnz[0] += locrowlens[j];
3446     }
3447     for (i=1; i<size; i++) {
3448       mend = browners[i+1] - browners[i];
3449       if (i == size-1) mend = mend - extra_rows;
3450       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3451       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3452       /* calculate the number of nonzeros on each processor */
3453       for (j=0; j<browners[i+1]-browners[i]; j++) {
3454         procsnz[i] += rowlengths[j];
3455       }
3456       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3457     }
3458     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3459   } else {
3460     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3461   }
3462 
3463   if (!rank) {
3464     /* determine max buffer needed and allocate it */
3465     maxnz = procsnz[0];
3466     for (i=1; i<size; i++) {
3467       maxnz = PetscMax(maxnz,procsnz[i]);
3468     }
3469     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3470 
3471     /* read in my part of the matrix column indices  */
3472     nz     = procsnz[0];
3473     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3474     mycols = ibuf;
3475     if (size == 1)  nz -= extra_rows;
3476     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3477     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3478 
3479     /* read in every ones (except the last) and ship off */
3480     for (i=1; i<size-1; i++) {
3481       nz   = procsnz[i];
3482       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3483       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3484     }
3485     /* read in the stuff for the last proc */
3486     if (size != 1) {
3487       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3488       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3489       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3490       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3491     }
3492     ierr = PetscFree(cols);CHKERRQ(ierr);
3493   } else {
3494     /* determine buffer space needed for message */
3495     nz = 0;
3496     for (i=0; i<m; i++) {
3497       nz += locrowlens[i];
3498     }
3499     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3500     mycols = ibuf;
3501     /* receive message of column indices*/
3502     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3503     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3504     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3505   }
3506 
3507   /* loop over local rows, determining number of off diagonal entries */
3508   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3509   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3510   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3511   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3512   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3513   rowcount = 0; nzcount = 0;
3514   for (i=0; i<mbs; i++) {
3515     dcount  = 0;
3516     odcount = 0;
3517     for (j=0; j<bs; j++) {
3518       kmax = locrowlens[rowcount];
3519       for (k=0; k<kmax; k++) {
3520         tmp = mycols[nzcount++]/bs;
3521         if (!mask[tmp]) {
3522           mask[tmp] = 1;
3523           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3524           else masked1[dcount++] = tmp;
3525         }
3526       }
3527       rowcount++;
3528     }
3529 
3530     dlens[i]  = dcount;
3531     odlens[i] = odcount;
3532 
3533     /* zero out the mask elements we set */
3534     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3535     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3536   }
3537 
3538   /* create our matrix */
3539   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
3540   ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3541   ierr = MatSetType(A,type);CHKERRQ(ierr)
3542   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3543 
3544   if (!rank) {
3545     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3546     /* read in my part of the matrix numerical values  */
3547     nz = procsnz[0];
3548     vals = buf;
3549     mycols = ibuf;
3550     if (size == 1)  nz -= extra_rows;
3551     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3552     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3553 
3554     /* insert into matrix */
3555     jj      = rstart*bs;
3556     for (i=0; i<m; i++) {
3557       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3558       mycols += locrowlens[i];
3559       vals   += locrowlens[i];
3560       jj++;
3561     }
3562     /* read in other processors (except the last one) and ship out */
3563     for (i=1; i<size-1; i++) {
3564       nz   = procsnz[i];
3565       vals = buf;
3566       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3567       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3568     }
3569     /* the last proc */
3570     if (size != 1){
3571       nz   = procsnz[i] - extra_rows;
3572       vals = buf;
3573       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3574       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3575       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3576     }
3577     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3578   } else {
3579     /* receive numeric values */
3580     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3581 
3582     /* receive message of values*/
3583     vals   = buf;
3584     mycols = ibuf;
3585     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3586     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3587     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3588 
3589     /* insert into matrix */
3590     jj      = rstart*bs;
3591     for (i=0; i<m; i++) {
3592       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3593       mycols += locrowlens[i];
3594       vals   += locrowlens[i];
3595       jj++;
3596     }
3597   }
3598   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3599   ierr = PetscFree(buf);CHKERRQ(ierr);
3600   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3601   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3602   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3603   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3604   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3605   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3606 
3607   *newmat = A;
3608   PetscFunctionReturn(0);
3609 }
3610 
3611 #undef __FUNCT__
3612 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3613 /*@
3614    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3615 
3616    Input Parameters:
3617 .  mat  - the matrix
3618 .  fact - factor
3619 
3620    Collective on Mat
3621 
3622    Level: advanced
3623 
3624   Notes:
3625    This can also be set by the command line option: -mat_use_hash_table <fact>
3626 
3627 .keywords: matrix, hashtable, factor, HT
3628 
3629 .seealso: MatSetOption()
3630 @*/
3631 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3632 {
3633   PetscErrorCode ierr,(*f)(Mat,PetscReal);
3634 
3635   PetscFunctionBegin;
3636   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
3637   if (f) {
3638     ierr = (*f)(mat,fact);CHKERRQ(ierr);
3639   }
3640   PetscFunctionReturn(0);
3641 }
3642 
3643 EXTERN_C_BEGIN
3644 #undef __FUNCT__
3645 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3646 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3647 {
3648   Mat_MPIBAIJ *baij;
3649 
3650   PetscFunctionBegin;
3651   baij = (Mat_MPIBAIJ*)mat->data;
3652   baij->ht_fact = fact;
3653   PetscFunctionReturn(0);
3654 }
3655 EXTERN_C_END
3656 
3657 #undef __FUNCT__
3658 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3659 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3660 {
3661   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3662   PetscFunctionBegin;
3663   *Ad     = a->A;
3664   *Ao     = a->B;
3665   *colmap = a->garray;
3666   PetscFunctionReturn(0);
3667 }
3668 
3669 /*
3670     Special version for direct calls from Fortran (to eliminate two function call overheads
3671 */
3672 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3673 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3674 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3675 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3676 #endif
3677 
3678 #undef __FUNCT__
3679 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3680 /*@C
3681   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3682 
3683   Collective on Mat
3684 
3685   Input Parameters:
3686 + mat - the matrix
3687 . min - number of input rows
3688 . im - input rows
3689 . nin - number of input columns
3690 . in - input columns
3691 . v - numerical values input
3692 - addvin - INSERT_VALUES or ADD_VALUES
3693 
3694   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3695 
3696   Level: advanced
3697 
3698 .seealso:   MatSetValuesBlocked()
3699 @*/
3700 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3701 {
3702   /* convert input arguments to C version */
3703   Mat             mat = *matin;
3704   PetscInt        m = *min, n = *nin;
3705   InsertMode      addv = *addvin;
3706 
3707   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3708   const MatScalar *value;
3709   MatScalar       *barray=baij->barray;
3710   PetscTruth      roworiented = baij->roworiented;
3711   PetscErrorCode  ierr;
3712   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3713   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3714   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3715 
3716   PetscFunctionBegin;
3717   /* tasks normally handled by MatSetValuesBlocked() */
3718   if (mat->insertmode == NOT_SET_VALUES) {
3719     mat->insertmode = addv;
3720   }
3721 #if defined(PETSC_USE_DEBUG)
3722   else if (mat->insertmode != addv) {
3723     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3724   }
3725   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3726 #endif
3727   if (mat->assembled) {
3728     mat->was_assembled = PETSC_TRUE;
3729     mat->assembled     = PETSC_FALSE;
3730   }
3731   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3732 
3733 
3734   if(!barray) {
3735     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3736     baij->barray = barray;
3737   }
3738 
3739   if (roworiented) {
3740     stepval = (n-1)*bs;
3741   } else {
3742     stepval = (m-1)*bs;
3743   }
3744   for (i=0; i<m; i++) {
3745     if (im[i] < 0) continue;
3746 #if defined(PETSC_USE_DEBUG)
3747     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3748 #endif
3749     if (im[i] >= rstart && im[i] < rend) {
3750       row = im[i] - rstart;
3751       for (j=0; j<n; j++) {
3752         /* If NumCol = 1 then a copy is not required */
3753         if ((roworiented) && (n == 1)) {
3754           barray = (MatScalar*)v + i*bs2;
3755         } else if((!roworiented) && (m == 1)) {
3756           barray = (MatScalar*)v + j*bs2;
3757         } else { /* Here a copy is required */
3758           if (roworiented) {
3759             value = v + i*(stepval+bs)*bs + j*bs;
3760           } else {
3761             value = v + j*(stepval+bs)*bs + i*bs;
3762           }
3763           for (ii=0; ii<bs; ii++,value+=stepval) {
3764             for (jj=0; jj<bs; jj++) {
3765               *barray++  = *value++;
3766             }
3767           }
3768           barray -=bs2;
3769         }
3770 
3771         if (in[j] >= cstart && in[j] < cend){
3772           col  = in[j] - cstart;
3773           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3774         }
3775         else if (in[j] < 0) continue;
3776 #if defined(PETSC_USE_DEBUG)
3777         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
3778 #endif
3779         else {
3780           if (mat->was_assembled) {
3781             if (!baij->colmap) {
3782               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3783             }
3784 
3785 #if defined(PETSC_USE_DEBUG)
3786 #if defined (PETSC_USE_CTABLE)
3787             { PetscInt data;
3788               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3789               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
3790             }
3791 #else
3792             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
3793 #endif
3794 #endif
3795 #if defined (PETSC_USE_CTABLE)
3796 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3797             col  = (col - 1)/bs;
3798 #else
3799             col = (baij->colmap[in[j]] - 1)/bs;
3800 #endif
3801             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3802               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3803               col =  in[j];
3804             }
3805           }
3806           else col = in[j];
3807           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3808         }
3809       }
3810     } else {
3811       if (!baij->donotstash) {
3812         if (roworiented) {
3813           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3814         } else {
3815           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3816         }
3817       }
3818     }
3819   }
3820 
3821   /* task normally handled by MatSetValuesBlocked() */
3822   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3823   PetscFunctionReturn(0);
3824 }
3825