xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 6ba06ab77c48d2bbbcba7e829ffcd04868efd3d5)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 #include "petscblaslapack.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
20 {
21   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
22   PetscErrorCode ierr;
23   PetscInt       i,*idxb = 0;
24   PetscScalar    *va,*vb;
25   Vec            vtmp;
26 
27   PetscFunctionBegin;
28   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
29   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
30   if (idx) {
31     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap->n; i++){
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
41   }
42 
43   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
44   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
45   if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);}
46   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
47   PetscFunctionReturn(0);
48 }
49 
50 EXTERN_C_BEGIN
51 #undef __FUNCT__
52 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
54 {
55   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
56   PetscErrorCode ierr;
57 
58   PetscFunctionBegin;
59   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
60   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
61   PetscFunctionReturn(0);
62 }
63 EXTERN_C_END
64 
65 EXTERN_C_BEGIN
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 EXTERN_C_END
79 
80 /*
81      Local utility routine that creates a mapping from the global column
82    number to the local number in the off-diagonal part of the local
83    storage of the matrix.  This is done in a non scalable way since the
84    length of colmap equals the global matrix length.
85 */
86 #undef __FUNCT__
87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
89 {
90   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
91   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
92   PetscErrorCode ierr;
93   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
94 
95   PetscFunctionBegin;
96 #if defined (PETSC_USE_CTABLE)
97   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
98   for (i=0; i<nbs; i++){
99     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
100   }
101 #else
102   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
103   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
105   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
106 #endif
107   PetscFunctionReturn(0);
108 }
109 
110 #define CHUNKSIZE  10
111 
112 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
113 { \
114  \
115     brow = row/bs;  \
116     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
117     rmax = aimax[brow]; nrow = ailen[brow]; \
118       bcol = col/bs; \
119       ridx = row % bs; cidx = col % bs; \
120       low = 0; high = nrow; \
121       while (high-low > 3) { \
122         t = (low+high)/2; \
123         if (rp[t] > bcol) high = t; \
124         else              low  = t; \
125       } \
126       for (_i=low; _i<high; _i++) { \
127         if (rp[_i] > bcol) break; \
128         if (rp[_i] == bcol) { \
129           bap  = ap +  bs2*_i + bs*cidx + ridx; \
130           if (addv == ADD_VALUES) *bap += value;  \
131           else                    *bap  = value;  \
132           goto a_noinsert; \
133         } \
134       } \
135       if (a->nonew == 1) goto a_noinsert; \
136       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
137       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
138       N = nrow++ - 1;  \
139       /* shift up all the later entries in this row */ \
140       for (ii=N; ii>=_i; ii--) { \
141         rp[ii+1] = rp[ii]; \
142         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
143       } \
144       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
145       rp[_i]                      = bcol;  \
146       ap[bs2*_i + bs*cidx + ridx] = value;  \
147       a_noinsert:; \
148     ailen[brow] = nrow; \
149 }
150 
151 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
152 { \
153     brow = row/bs;  \
154     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
155     rmax = bimax[brow]; nrow = bilen[brow]; \
156       bcol = col/bs; \
157       ridx = row % bs; cidx = col % bs; \
158       low = 0; high = nrow; \
159       while (high-low > 3) { \
160         t = (low+high)/2; \
161         if (rp[t] > bcol) high = t; \
162         else              low  = t; \
163       } \
164       for (_i=low; _i<high; _i++) { \
165         if (rp[_i] > bcol) break; \
166         if (rp[_i] == bcol) { \
167           bap  = ap +  bs2*_i + bs*cidx + ridx; \
168           if (addv == ADD_VALUES) *bap += value;  \
169           else                    *bap  = value;  \
170           goto b_noinsert; \
171         } \
172       } \
173       if (b->nonew == 1) goto b_noinsert; \
174       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
175       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
176       CHKMEMQ;\
177       N = nrow++ - 1;  \
178       /* shift up all the later entries in this row */ \
179       for (ii=N; ii>=_i; ii--) { \
180         rp[ii+1] = rp[ii]; \
181         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
182       } \
183       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
184       rp[_i]                      = bcol;  \
185       ap[bs2*_i + bs*cidx + ridx] = value;  \
186       b_noinsert:; \
187     bilen[brow] = nrow; \
188 }
189 
190 #undef __FUNCT__
191 #define __FUNCT__ "MatSetValues_MPIBAIJ"
192 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
193 {
194   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
195   MatScalar      value;
196   PetscTruth     roworiented = baij->roworiented;
197   PetscErrorCode ierr;
198   PetscInt       i,j,row,col;
199   PetscInt       rstart_orig=mat->rmap->rstart;
200   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
201   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
202 
203   /* Some Variables required in the macro */
204   Mat            A = baij->A;
205   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
206   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
207   MatScalar      *aa=a->a;
208 
209   Mat            B = baij->B;
210   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
211   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
212   MatScalar      *ba=b->a;
213 
214   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
215   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
216   MatScalar      *ap,*bap;
217 
218   PetscFunctionBegin;
219   if (v) PetscValidScalarPointer(v,6);
220   for (i=0; i<m; i++) {
221     if (im[i] < 0) continue;
222 #if defined(PETSC_USE_DEBUG)
223     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
224 #endif
225     if (im[i] >= rstart_orig && im[i] < rend_orig) {
226       row = im[i] - rstart_orig;
227       for (j=0; j<n; j++) {
228         if (in[j] >= cstart_orig && in[j] < cend_orig){
229           col = in[j] - cstart_orig;
230           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
231           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
232           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
233         } else if (in[j] < 0) continue;
234 #if defined(PETSC_USE_DEBUG)
235         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1);
236 #endif
237         else {
238           if (mat->was_assembled) {
239             if (!baij->colmap) {
240               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
241             }
242 #if defined (PETSC_USE_CTABLE)
243             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
244             col  = col - 1;
245 #else
246             col = baij->colmap[in[j]/bs] - 1;
247 #endif
248             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
249               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
250               col =  in[j];
251               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
252               B = baij->B;
253               b = (Mat_SeqBAIJ*)(B)->data;
254               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
255               ba=b->a;
256             } else col += in[j]%bs;
257           } else col = in[j];
258           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
259           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!baij->donotstash) {
265         if (roworiented) {
266           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
267         } else {
268           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
269         }
270       }
271     }
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNCT__
277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
279 {
280   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
281   const PetscScalar *value;
282   MatScalar         *barray=baij->barray;
283   PetscTruth        roworiented = baij->roworiented;
284   PetscErrorCode    ierr;
285   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
286   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
287   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
288 
289   PetscFunctionBegin;
290   if(!barray) {
291     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
292     baij->barray = barray;
293   }
294 
295   if (roworiented) {
296     stepval = (n-1)*bs;
297   } else {
298     stepval = (m-1)*bs;
299   }
300   for (i=0; i<m; i++) {
301     if (im[i] < 0) continue;
302 #if defined(PETSC_USE_DEBUG)
303     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
304 #endif
305     if (im[i] >= rstart && im[i] < rend) {
306       row = im[i] - rstart;
307       for (j=0; j<n; j++) {
308         /* If NumCol = 1 then a copy is not required */
309         if ((roworiented) && (n == 1)) {
310           barray = (MatScalar*)v + i*bs2;
311         } else if((!roworiented) && (m == 1)) {
312           barray = (MatScalar*)v + j*bs2;
313         } else { /* Here a copy is required */
314           if (roworiented) {
315             value = v + i*(stepval+bs)*bs + j*bs;
316           } else {
317             value = v + j*(stepval+bs)*bs + i*bs;
318           }
319           for (ii=0; ii<bs; ii++,value+=stepval) {
320             for (jj=0; jj<bs; jj++) {
321               *barray++  = *value++;
322             }
323           }
324           barray -=bs2;
325         }
326 
327         if (in[j] >= cstart && in[j] < cend){
328           col  = in[j] - cstart;
329           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
330         }
331         else if (in[j] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
334 #endif
335         else {
336           if (mat->was_assembled) {
337             if (!baij->colmap) {
338               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
339             }
340 
341 #if defined(PETSC_USE_DEBUG)
342 #if defined (PETSC_USE_CTABLE)
343             { PetscInt data;
344               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
345               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346             }
347 #else
348             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
349 #endif
350 #endif
351 #if defined (PETSC_USE_CTABLE)
352 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
353             col  = (col - 1)/bs;
354 #else
355             col = (baij->colmap[in[j]] - 1)/bs;
356 #endif
357             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
358               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
359               col =  in[j];
360             }
361           }
362           else col = in[j];
363           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
364         }
365       }
366     } else {
367       if (!baij->donotstash) {
368         if (roworiented) {
369           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
370         } else {
371           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
372         }
373       }
374     }
375   }
376   PetscFunctionReturn(0);
377 }
378 
379 #define HASH_KEY 0.6180339887
380 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
381 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
382 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
383 #undef __FUNCT__
384 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
385 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
386 {
387   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
388   PetscTruth     roworiented = baij->roworiented;
389   PetscErrorCode ierr;
390   PetscInt       i,j,row,col;
391   PetscInt       rstart_orig=mat->rmap->rstart;
392   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
393   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
394   PetscReal      tmp;
395   MatScalar      **HD = baij->hd,value;
396 #if defined(PETSC_USE_DEBUG)
397   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
398 #endif
399 
400   PetscFunctionBegin;
401   if (v) PetscValidScalarPointer(v,6);
402   for (i=0; i<m; i++) {
403 #if defined(PETSC_USE_DEBUG)
404     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
405     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
406 #endif
407       row = im[i];
408     if (row >= rstart_orig && row < rend_orig) {
409       for (j=0; j<n; j++) {
410         col = in[j];
411         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
412         /* Look up PetscInto the Hash Table */
413         key = (row/bs)*Nbs+(col/bs)+1;
414         h1  = HASH(size,key,tmp);
415 
416 
417         idx = h1;
418 #if defined(PETSC_USE_DEBUG)
419         insert_ct++;
420         total_ct++;
421         if (HT[idx] != key) {
422           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
423           if (idx == size) {
424             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
425             if (idx == h1) {
426               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
427             }
428           }
429         }
430 #else
431         if (HT[idx] != key) {
432           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
433           if (idx == size) {
434             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
435             if (idx == h1) {
436               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
437             }
438           }
439         }
440 #endif
441         /* A HASH table entry is found, so insert the values at the correct address */
442         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
443         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
444       }
445     } else {
446       if (!baij->donotstash) {
447         if (roworiented) {
448           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
449         } else {
450           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
451         }
452       }
453     }
454   }
455 #if defined(PETSC_USE_DEBUG)
456   baij->ht_total_ct = total_ct;
457   baij->ht_insert_ct = insert_ct;
458 #endif
459   PetscFunctionReturn(0);
460 }
461 
462 #undef __FUNCT__
463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
465 {
466   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
467   PetscTruth        roworiented = baij->roworiented;
468   PetscErrorCode    ierr;
469   PetscInt          i,j,ii,jj,row,col;
470   PetscInt          rstart=baij->rstartbs;
471   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
472   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
473   PetscReal         tmp;
474   MatScalar         **HD = baij->hd,*baij_a;
475   const PetscScalar *v_t,*value;
476 #if defined(PETSC_USE_DEBUG)
477   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
478 #endif
479 
480   PetscFunctionBegin;
481 
482   if (roworiented) {
483     stepval = (n-1)*bs;
484   } else {
485     stepval = (m-1)*bs;
486   }
487   for (i=0; i<m; i++) {
488 #if defined(PETSC_USE_DEBUG)
489     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
490     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
491 #endif
492     row   = im[i];
493     v_t   = v + i*nbs2;
494     if (row >= rstart && row < rend) {
495       for (j=0; j<n; j++) {
496         col = in[j];
497 
498         /* Look up into the Hash Table */
499         key = row*Nbs+col+1;
500         h1  = HASH(size,key,tmp);
501 
502         idx = h1;
503 #if defined(PETSC_USE_DEBUG)
504         total_ct++;
505         insert_ct++;
506        if (HT[idx] != key) {
507           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
508           if (idx == size) {
509             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
510             if (idx == h1) {
511               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
512             }
513           }
514         }
515 #else
516         if (HT[idx] != key) {
517           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
518           if (idx == size) {
519             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
520             if (idx == h1) {
521               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
522             }
523           }
524         }
525 #endif
526         baij_a = HD[idx];
527         if (roworiented) {
528           /*value = v + i*(stepval+bs)*bs + j*bs;*/
529           /* value = v + (i*(stepval+bs)+j)*bs; */
530           value = v_t;
531           v_t  += bs;
532           if (addv == ADD_VALUES) {
533             for (ii=0; ii<bs; ii++,value+=stepval) {
534               for (jj=ii; jj<bs2; jj+=bs) {
535                 baij_a[jj]  += *value++;
536               }
537             }
538           } else {
539             for (ii=0; ii<bs; ii++,value+=stepval) {
540               for (jj=ii; jj<bs2; jj+=bs) {
541                 baij_a[jj]  = *value++;
542               }
543             }
544           }
545         } else {
546           value = v + j*(stepval+bs)*bs + i*bs;
547           if (addv == ADD_VALUES) {
548             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
549               for (jj=0; jj<bs; jj++) {
550                 baij_a[jj]  += *value++;
551               }
552             }
553           } else {
554             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
555               for (jj=0; jj<bs; jj++) {
556                 baij_a[jj]  = *value++;
557               }
558             }
559           }
560         }
561       }
562     } else {
563       if (!baij->donotstash) {
564         if (roworiented) {
565           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
566         } else {
567           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
568         }
569       }
570     }
571   }
572 #if defined(PETSC_USE_DEBUG)
573   baij->ht_total_ct = total_ct;
574   baij->ht_insert_ct = insert_ct;
575 #endif
576   PetscFunctionReturn(0);
577 }
578 
579 #undef __FUNCT__
580 #define __FUNCT__ "MatGetValues_MPIBAIJ"
581 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
582 {
583   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
584   PetscErrorCode ierr;
585   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
586   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
587 
588   PetscFunctionBegin;
589   for (i=0; i<m; i++) {
590     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
591     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
592     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
593       row = idxm[i] - bsrstart;
594       for (j=0; j<n; j++) {
595         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
596         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
597         if (idxn[j] >= bscstart && idxn[j] < bscend){
598           col = idxn[j] - bscstart;
599           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
600         } else {
601           if (!baij->colmap) {
602             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
603           }
604 #if defined (PETSC_USE_CTABLE)
605           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
606           data --;
607 #else
608           data = baij->colmap[idxn[j]/bs]-1;
609 #endif
610           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
611           else {
612             col  = data + idxn[j]%bs;
613             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
614           }
615         }
616       }
617     } else {
618       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
619     }
620   }
621  PetscFunctionReturn(0);
622 }
623 
624 #undef __FUNCT__
625 #define __FUNCT__ "MatNorm_MPIBAIJ"
626 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
627 {
628   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
629   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
630   PetscErrorCode ierr;
631   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
632   PetscReal      sum = 0.0;
633   MatScalar      *v;
634 
635   PetscFunctionBegin;
636   if (baij->size == 1) {
637     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
638   } else {
639     if (type == NORM_FROBENIUS) {
640       v = amat->a;
641       nz = amat->nz*bs2;
642       for (i=0; i<nz; i++) {
643 #if defined(PETSC_USE_COMPLEX)
644         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
645 #else
646         sum += (*v)*(*v); v++;
647 #endif
648       }
649       v = bmat->a;
650       nz = bmat->nz*bs2;
651       for (i=0; i<nz; i++) {
652 #if defined(PETSC_USE_COMPLEX)
653         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
654 #else
655         sum += (*v)*(*v); v++;
656 #endif
657       }
658       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
659       *nrm = sqrt(*nrm);
660     } else if (type == NORM_1) { /* max column sum */
661       PetscReal *tmp,*tmp2;
662       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
663       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
664       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
665       v = amat->a; jj = amat->j;
666       for (i=0; i<amat->nz; i++) {
667         for (j=0; j<bs; j++){
668           col = bs*(cstart + *jj) + j; /* column index */
669           for (row=0; row<bs; row++){
670             tmp[col] += PetscAbsScalar(*v);  v++;
671           }
672         }
673         jj++;
674       }
675       v = bmat->a; jj = bmat->j;
676       for (i=0; i<bmat->nz; i++) {
677         for (j=0; j<bs; j++){
678           col = bs*garray[*jj] + j;
679           for (row=0; row<bs; row++){
680             tmp[col] += PetscAbsScalar(*v); v++;
681           }
682         }
683         jj++;
684       }
685       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
686       *nrm = 0.0;
687       for (j=0; j<mat->cmap->N; j++) {
688         if (tmp2[j] > *nrm) *nrm = tmp2[j];
689       }
690       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
691     } else if (type == NORM_INFINITY) { /* max row sum */
692       PetscReal *sums;
693       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
694       sum = 0.0;
695       for (j=0; j<amat->mbs; j++) {
696         for (row=0; row<bs; row++) sums[row] = 0.0;
697         v = amat->a + bs2*amat->i[j];
698         nz = amat->i[j+1]-amat->i[j];
699         for (i=0; i<nz; i++) {
700           for (col=0; col<bs; col++){
701             for (row=0; row<bs; row++){
702               sums[row] += PetscAbsScalar(*v); v++;
703             }
704           }
705         }
706         v = bmat->a + bs2*bmat->i[j];
707         nz = bmat->i[j+1]-bmat->i[j];
708         for (i=0; i<nz; i++) {
709           for (col=0; col<bs; col++){
710             for (row=0; row<bs; row++){
711               sums[row] += PetscAbsScalar(*v); v++;
712             }
713           }
714         }
715         for (row=0; row<bs; row++){
716           if (sums[row] > sum) sum = sums[row];
717         }
718       }
719       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
720       ierr = PetscFree(sums);CHKERRQ(ierr);
721     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
722   }
723   PetscFunctionReturn(0);
724 }
725 
726 /*
727   Creates the hash table, and sets the table
728   This table is created only once.
729   If new entried need to be added to the matrix
730   then the hash table has to be destroyed and
731   recreated.
732 */
733 #undef __FUNCT__
734 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
735 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
736 {
737   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
738   Mat            A = baij->A,B=baij->B;
739   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
740   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
741   PetscErrorCode ierr;
742   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
743   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
744   PetscInt       *HT,key;
745   MatScalar      **HD;
746   PetscReal      tmp;
747 #if defined(PETSC_USE_INFO)
748   PetscInt       ct=0,max=0;
749 #endif
750 
751   PetscFunctionBegin;
752   if (baij->ht) PetscFunctionReturn(0);
753 
754   baij->ht_size = (PetscInt)(factor*nz);
755   ht_size       = baij->ht_size;
756 
757   /* Allocate Memory for Hash Table */
758   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
759   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
760   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
761   HD   = baij->hd;
762   HT   = baij->ht;
763 
764   /* Loop Over A */
765   for (i=0; i<a->mbs; i++) {
766     for (j=ai[i]; j<ai[i+1]; j++) {
767       row = i+rstart;
768       col = aj[j]+cstart;
769 
770       key = row*Nbs + col + 1;
771       h1  = HASH(ht_size,key,tmp);
772       for (k=0; k<ht_size; k++){
773         if (!HT[(h1+k)%ht_size]) {
774           HT[(h1+k)%ht_size] = key;
775           HD[(h1+k)%ht_size] = a->a + j*bs2;
776           break;
777 #if defined(PETSC_USE_INFO)
778         } else {
779           ct++;
780 #endif
781         }
782       }
783 #if defined(PETSC_USE_INFO)
784       if (k> max) max = k;
785 #endif
786     }
787   }
788   /* Loop Over B */
789   for (i=0; i<b->mbs; i++) {
790     for (j=bi[i]; j<bi[i+1]; j++) {
791       row = i+rstart;
792       col = garray[bj[j]];
793       key = row*Nbs + col + 1;
794       h1  = HASH(ht_size,key,tmp);
795       for (k=0; k<ht_size; k++){
796         if (!HT[(h1+k)%ht_size]) {
797           HT[(h1+k)%ht_size] = key;
798           HD[(h1+k)%ht_size] = b->a + j*bs2;
799           break;
800 #if defined(PETSC_USE_INFO)
801         } else {
802           ct++;
803 #endif
804         }
805       }
806 #if defined(PETSC_USE_INFO)
807       if (k> max) max = k;
808 #endif
809     }
810   }
811 
812   /* Print Summary */
813 #if defined(PETSC_USE_INFO)
814   for (i=0,j=0; i<ht_size; i++) {
815     if (HT[i]) {j++;}
816   }
817   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
818 #endif
819   PetscFunctionReturn(0);
820 }
821 
822 #undef __FUNCT__
823 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
824 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
825 {
826   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
827   PetscErrorCode ierr;
828   PetscInt       nstash,reallocs;
829   InsertMode     addv;
830 
831   PetscFunctionBegin;
832   if (baij->donotstash) {
833     PetscFunctionReturn(0);
834   }
835 
836   /* make sure all processors are either in INSERTMODE or ADDMODE */
837   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
838   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
839   mat->insertmode = addv; /* in case this processor had no cache */
840 
841   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
842   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
843   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
844   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
845   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
846   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
847   PetscFunctionReturn(0);
848 }
849 
850 #undef __FUNCT__
851 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
852 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
853 {
854   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
855   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
856   PetscErrorCode ierr;
857   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
858   PetscInt       *row,*col;
859   PetscTruth     r1,r2,r3,other_disassembled;
860   MatScalar      *val;
861   InsertMode     addv = mat->insertmode;
862   PetscMPIInt    n;
863 
864   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
865   PetscFunctionBegin;
866   if (!baij->donotstash) {
867     while (1) {
868       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
869       if (!flg) break;
870 
871       for (i=0; i<n;) {
872         /* Now identify the consecutive vals belonging to the same row */
873         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
874         if (j < n) ncols = j-i;
875         else       ncols = n-i;
876         /* Now assemble all these values with a single function call */
877         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
878         i = j;
879       }
880     }
881     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
882     /* Now process the block-stash. Since the values are stashed column-oriented,
883        set the roworiented flag to column oriented, and after MatSetValues()
884        restore the original flags */
885     r1 = baij->roworiented;
886     r2 = a->roworiented;
887     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
888     baij->roworiented = PETSC_FALSE;
889     a->roworiented    = PETSC_FALSE;
890     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
891     while (1) {
892       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
893       if (!flg) break;
894 
895       for (i=0; i<n;) {
896         /* Now identify the consecutive vals belonging to the same row */
897         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
898         if (j < n) ncols = j-i;
899         else       ncols = n-i;
900         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
901         i = j;
902       }
903     }
904     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
905     baij->roworiented = r1;
906     a->roworiented    = r2;
907     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
908   }
909 
910   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
911   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
912 
913   /* determine if any processor has disassembled, if so we must
914      also disassemble ourselfs, in order that we may reassemble. */
915   /*
916      if nonzero structure of submatrix B cannot change then we know that
917      no processor disassembled thus we can skip this stuff
918   */
919   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
920     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
921     if (mat->was_assembled && !other_disassembled) {
922       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
923     }
924   }
925 
926   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
927     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
928   }
929   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
930   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
931   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
932 
933 #if defined(PETSC_USE_INFO)
934   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
935     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
936     baij->ht_total_ct  = 0;
937     baij->ht_insert_ct = 0;
938   }
939 #endif
940   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
941     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
942     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
943     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
944   }
945 
946   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
947   baij->rowvalues = 0;
948   PetscFunctionReturn(0);
949 }
950 
951 #undef __FUNCT__
952 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
953 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
954 {
955   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
956   PetscErrorCode    ierr;
957   PetscMPIInt       size = baij->size,rank = baij->rank;
958   PetscInt          bs = mat->rmap->bs;
959   PetscTruth        iascii,isdraw;
960   PetscViewer       sviewer;
961   PetscViewerFormat format;
962 
963   PetscFunctionBegin;
964   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
965   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
966   if (iascii) {
967     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
968     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
969       MatInfo info;
970       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
971       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
972       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
973               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
974               mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
975       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
976       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
977       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
978       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
979       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
980       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
981       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
982       PetscFunctionReturn(0);
983     } else if (format == PETSC_VIEWER_ASCII_INFO) {
984       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
985       PetscFunctionReturn(0);
986     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
987       PetscFunctionReturn(0);
988     }
989   }
990 
991   if (isdraw) {
992     PetscDraw       draw;
993     PetscTruth isnull;
994     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
995     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
996   }
997 
998   if (size == 1) {
999     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1000     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1001   } else {
1002     /* assemble the entire matrix onto first processor. */
1003     Mat         A;
1004     Mat_SeqBAIJ *Aloc;
1005     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1006     MatScalar   *a;
1007 
1008     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1009     /* Perhaps this should be the type of mat? */
1010     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1011     if (!rank) {
1012       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1013     } else {
1014       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1015     }
1016     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1017     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1018     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1019 
1020     /* copy over the A part */
1021     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1022     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1023     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1024 
1025     for (i=0; i<mbs; i++) {
1026       rvals[0] = bs*(baij->rstartbs + i);
1027       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1028       for (j=ai[i]; j<ai[i+1]; j++) {
1029         col = (baij->cstartbs+aj[j])*bs;
1030         for (k=0; k<bs; k++) {
1031           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1032           col++; a += bs;
1033         }
1034       }
1035     }
1036     /* copy over the B part */
1037     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1038     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1039     for (i=0; i<mbs; i++) {
1040       rvals[0] = bs*(baij->rstartbs + i);
1041       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1042       for (j=ai[i]; j<ai[i+1]; j++) {
1043         col = baij->garray[aj[j]]*bs;
1044         for (k=0; k<bs; k++) {
1045           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1046           col++; a += bs;
1047         }
1048       }
1049     }
1050     ierr = PetscFree(rvals);CHKERRQ(ierr);
1051     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1052     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1053     /*
1054        Everyone has to call to draw the matrix since the graphics waits are
1055        synchronized across all processors that share the PetscDraw object
1056     */
1057     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1058     if (!rank) {
1059       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1060       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1061     }
1062     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1063     ierr = MatDestroy(A);CHKERRQ(ierr);
1064   }
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatView_MPIBAIJ"
1070 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1071 {
1072   PetscErrorCode ierr;
1073   PetscTruth     iascii,isdraw,issocket,isbinary;
1074 
1075   PetscFunctionBegin;
1076   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1077   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1078   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1079   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1080   if (iascii || isdraw || issocket || isbinary) {
1081     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1082   } else {
1083     SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1084   }
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1090 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1091 {
1092   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1093   PetscErrorCode ierr;
1094 
1095   PetscFunctionBegin;
1096 #if defined(PETSC_USE_LOG)
1097   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1098 #endif
1099   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1100   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1101   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1102   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1103 #if defined (PETSC_USE_CTABLE)
1104   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1105 #else
1106   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1107 #endif
1108   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1109   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1110   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1111   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1112   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1113   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1114   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1115   ierr = PetscFree(baij);CHKERRQ(ierr);
1116 
1117   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1118   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1119   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1120   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1121   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1122   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1123   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1124   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 #undef __FUNCT__
1129 #define __FUNCT__ "MatMult_MPIBAIJ"
1130 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1131 {
1132   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1133   PetscErrorCode ierr;
1134   PetscInt       nt;
1135 
1136   PetscFunctionBegin;
1137   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1138   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1139   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1140   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1141   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1142   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1143   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1144   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1150 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1151 {
1152   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1153   PetscErrorCode ierr;
1154 
1155   PetscFunctionBegin;
1156   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1157   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1158   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1159   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1160   PetscFunctionReturn(0);
1161 }
1162 
1163 #undef __FUNCT__
1164 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1165 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1166 {
1167   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1168   PetscErrorCode ierr;
1169   PetscTruth     merged;
1170 
1171   PetscFunctionBegin;
1172   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1173   /* do nondiagonal part */
1174   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1175   if (!merged) {
1176     /* send it on its way */
1177     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1178     /* do local part */
1179     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1180     /* receive remote parts: note this assumes the values are not actually */
1181     /* inserted in yy until the next line */
1182     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1183   } else {
1184     /* do local part */
1185     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1186     /* send it on its way */
1187     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1188     /* values actually were received in the Begin() but we need to call this nop */
1189     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1190   }
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1196 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1197 {
1198   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1199   PetscErrorCode ierr;
1200 
1201   PetscFunctionBegin;
1202   /* do nondiagonal part */
1203   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1204   /* send it on its way */
1205   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1206   /* do local part */
1207   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1208   /* receive remote parts: note this assumes the values are not actually */
1209   /* inserted in yy until the next line, which is true for my implementation*/
1210   /* but is not perhaps always true. */
1211   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1212   PetscFunctionReturn(0);
1213 }
1214 
1215 /*
1216   This only works correctly for square matrices where the subblock A->A is the
1217    diagonal block
1218 */
1219 #undef __FUNCT__
1220 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1221 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1222 {
1223   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1224   PetscErrorCode ierr;
1225 
1226   PetscFunctionBegin;
1227   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1228   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 #undef __FUNCT__
1233 #define __FUNCT__ "MatScale_MPIBAIJ"
1234 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1235 {
1236   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1237   PetscErrorCode ierr;
1238 
1239   PetscFunctionBegin;
1240   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1241   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 #undef __FUNCT__
1246 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1247 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1248 {
1249   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1250   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1251   PetscErrorCode ierr;
1252   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1253   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1254   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1255 
1256   PetscFunctionBegin;
1257   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1258   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1259   mat->getrowactive = PETSC_TRUE;
1260 
1261   if (!mat->rowvalues && (idx || v)) {
1262     /*
1263         allocate enough space to hold information from the longest row.
1264     */
1265     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1266     PetscInt     max = 1,mbs = mat->mbs,tmp;
1267     for (i=0; i<mbs; i++) {
1268       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1269       if (max < tmp) { max = tmp; }
1270     }
1271     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1272   }
1273   lrow = row - brstart;
1274 
1275   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1276   if (!v)   {pvA = 0; pvB = 0;}
1277   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1278   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1279   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1280   nztot = nzA + nzB;
1281 
1282   cmap  = mat->garray;
1283   if (v  || idx) {
1284     if (nztot) {
1285       /* Sort by increasing column numbers, assuming A and B already sorted */
1286       PetscInt imark = -1;
1287       if (v) {
1288         *v = v_p = mat->rowvalues;
1289         for (i=0; i<nzB; i++) {
1290           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1291           else break;
1292         }
1293         imark = i;
1294         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1295         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1296       }
1297       if (idx) {
1298         *idx = idx_p = mat->rowindices;
1299         if (imark > -1) {
1300           for (i=0; i<imark; i++) {
1301             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1302           }
1303         } else {
1304           for (i=0; i<nzB; i++) {
1305             if (cmap[cworkB[i]/bs] < cstart)
1306               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1307             else break;
1308           }
1309           imark = i;
1310         }
1311         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1312         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1313       }
1314     } else {
1315       if (idx) *idx = 0;
1316       if (v)   *v   = 0;
1317     }
1318   }
1319   *nz = nztot;
1320   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1321   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1327 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1328 {
1329   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1330 
1331   PetscFunctionBegin;
1332   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1333   baij->getrowactive = PETSC_FALSE;
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 #undef __FUNCT__
1338 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1339 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1340 {
1341   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1342   PetscErrorCode ierr;
1343 
1344   PetscFunctionBegin;
1345   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1346   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1347   PetscFunctionReturn(0);
1348 }
1349 
1350 #undef __FUNCT__
1351 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1352 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1353 {
1354   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1355   Mat            A = a->A,B = a->B;
1356   PetscErrorCode ierr;
1357   PetscReal      isend[5],irecv[5];
1358 
1359   PetscFunctionBegin;
1360   info->block_size     = (PetscReal)matin->rmap->bs;
1361   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1362   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1363   isend[3] = info->memory;  isend[4] = info->mallocs;
1364   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1365   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1366   isend[3] += info->memory;  isend[4] += info->mallocs;
1367   if (flag == MAT_LOCAL) {
1368     info->nz_used      = isend[0];
1369     info->nz_allocated = isend[1];
1370     info->nz_unneeded  = isend[2];
1371     info->memory       = isend[3];
1372     info->mallocs      = isend[4];
1373   } else if (flag == MAT_GLOBAL_MAX) {
1374     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1375     info->nz_used      = irecv[0];
1376     info->nz_allocated = irecv[1];
1377     info->nz_unneeded  = irecv[2];
1378     info->memory       = irecv[3];
1379     info->mallocs      = irecv[4];
1380   } else if (flag == MAT_GLOBAL_SUM) {
1381     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1382     info->nz_used      = irecv[0];
1383     info->nz_allocated = irecv[1];
1384     info->nz_unneeded  = irecv[2];
1385     info->memory       = irecv[3];
1386     info->mallocs      = irecv[4];
1387   } else {
1388     SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1389   }
1390   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1391   info->fill_ratio_needed = 0;
1392   info->factor_mallocs    = 0;
1393   PetscFunctionReturn(0);
1394 }
1395 
1396 #undef __FUNCT__
1397 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1398 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg)
1399 {
1400   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1401   PetscErrorCode ierr;
1402 
1403   PetscFunctionBegin;
1404   switch (op) {
1405   case MAT_NEW_NONZERO_LOCATIONS:
1406   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1407   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1408   case MAT_KEEP_NONZERO_PATTERN:
1409   case MAT_NEW_NONZERO_LOCATION_ERR:
1410     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1411     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1412     break;
1413   case MAT_ROW_ORIENTED:
1414     a->roworiented = flg;
1415     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1416     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1417     break;
1418   case MAT_NEW_DIAGONALS:
1419     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1420     break;
1421   case MAT_IGNORE_OFF_PROC_ENTRIES:
1422     a->donotstash = flg;
1423     break;
1424   case MAT_USE_HASH_TABLE:
1425     a->ht_flag = flg;
1426     break;
1427   case MAT_SYMMETRIC:
1428   case MAT_STRUCTURALLY_SYMMETRIC:
1429   case MAT_HERMITIAN:
1430   case MAT_SYMMETRY_ETERNAL:
1431     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1432     break;
1433   default:
1434     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1435   }
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 #undef __FUNCT__
1440 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1441 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1442 {
1443   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1444   Mat_SeqBAIJ    *Aloc;
1445   Mat            B;
1446   PetscErrorCode ierr;
1447   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1448   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1449   MatScalar      *a;
1450 
1451   PetscFunctionBegin;
1452   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1453   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1454     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1455     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1456     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1457     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1458   } else {
1459     B = *matout;
1460   }
1461 
1462   /* copy over the A part */
1463   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1464   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1465   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1466 
1467   for (i=0; i<mbs; i++) {
1468     rvals[0] = bs*(baij->rstartbs + i);
1469     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1470     for (j=ai[i]; j<ai[i+1]; j++) {
1471       col = (baij->cstartbs+aj[j])*bs;
1472       for (k=0; k<bs; k++) {
1473         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1474         col++; a += bs;
1475       }
1476     }
1477   }
1478   /* copy over the B part */
1479   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1480   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1481   for (i=0; i<mbs; i++) {
1482     rvals[0] = bs*(baij->rstartbs + i);
1483     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1484     for (j=ai[i]; j<ai[i+1]; j++) {
1485       col = baij->garray[aj[j]]*bs;
1486       for (k=0; k<bs; k++) {
1487         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1488         col++; a += bs;
1489       }
1490     }
1491   }
1492   ierr = PetscFree(rvals);CHKERRQ(ierr);
1493   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1494   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1495 
1496   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1497     *matout = B;
1498   } else {
1499     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1500   }
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 #undef __FUNCT__
1505 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1506 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1507 {
1508   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1509   Mat            a = baij->A,b = baij->B;
1510   PetscErrorCode ierr;
1511   PetscInt       s1,s2,s3;
1512 
1513   PetscFunctionBegin;
1514   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1515   if (rr) {
1516     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1517     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1518     /* Overlap communication with computation. */
1519     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1520   }
1521   if (ll) {
1522     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1523     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1524     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1525   }
1526   /* scale  the diagonal block */
1527   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1528 
1529   if (rr) {
1530     /* Do a scatter end and then right scale the off-diagonal block */
1531     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1532     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1533   }
1534 
1535   PetscFunctionReturn(0);
1536 }
1537 
1538 #undef __FUNCT__
1539 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1540 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1541 {
1542   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1543   PetscErrorCode ierr;
1544   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1545   PetscInt       i,*owners = A->rmap->range;
1546   PetscInt       *nprocs,j,idx,nsends,row;
1547   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1548   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1549   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1550   MPI_Comm       comm = ((PetscObject)A)->comm;
1551   MPI_Request    *send_waits,*recv_waits;
1552   MPI_Status     recv_status,*send_status;
1553 #if defined(PETSC_DEBUG)
1554   PetscTruth     found = PETSC_FALSE;
1555 #endif
1556 
1557   PetscFunctionBegin;
1558   /*  first count number of contributors to each processor */
1559   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1560   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1561   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1562   j = 0;
1563   for (i=0; i<N; i++) {
1564     if (lastidx > (idx = rows[i])) j = 0;
1565     lastidx = idx;
1566     for (; j<size; j++) {
1567       if (idx >= owners[j] && idx < owners[j+1]) {
1568         nprocs[2*j]++;
1569         nprocs[2*j+1] = 1;
1570         owner[i] = j;
1571 #if defined(PETSC_DEBUG)
1572         found = PETSC_TRUE;
1573 #endif
1574         break;
1575       }
1576     }
1577 #if defined(PETSC_DEBUG)
1578     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1579     found = PETSC_FALSE;
1580 #endif
1581   }
1582   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1583 
1584   /* inform other processors of number of messages and max length*/
1585   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1586 
1587   /* post receives:   */
1588   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1589   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1590   for (i=0; i<nrecvs; i++) {
1591     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1592   }
1593 
1594   /* do sends:
1595      1) starts[i] gives the starting index in svalues for stuff going to
1596      the ith processor
1597   */
1598   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1599   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1600   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1601   starts[0]  = 0;
1602   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1603   for (i=0; i<N; i++) {
1604     svalues[starts[owner[i]]++] = rows[i];
1605   }
1606 
1607   starts[0] = 0;
1608   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1609   count = 0;
1610   for (i=0; i<size; i++) {
1611     if (nprocs[2*i+1]) {
1612       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1613     }
1614   }
1615   ierr = PetscFree(starts);CHKERRQ(ierr);
1616 
1617   base = owners[rank];
1618 
1619   /*  wait on receives */
1620   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1621   count  = nrecvs;
1622   slen = 0;
1623   while (count) {
1624     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1625     /* unpack receives into our local space */
1626     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1627     source[imdex]  = recv_status.MPI_SOURCE;
1628     lens[imdex]    = n;
1629     slen          += n;
1630     count--;
1631   }
1632   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1633 
1634   /* move the data into the send scatter */
1635   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1636   count = 0;
1637   for (i=0; i<nrecvs; i++) {
1638     values = rvalues + i*nmax;
1639     for (j=0; j<lens[i]; j++) {
1640       lrows[count++] = values[j] - base;
1641     }
1642   }
1643   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1644   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1645   ierr = PetscFree(owner);CHKERRQ(ierr);
1646   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1647 
1648   /* actually zap the local rows */
1649   /*
1650         Zero the required rows. If the "diagonal block" of the matrix
1651      is square and the user wishes to set the diagonal we use separate
1652      code so that MatSetValues() is not called for each diagonal allocating
1653      new memory, thus calling lots of mallocs and slowing things down.
1654 
1655   */
1656   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1657   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1658   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1659     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1660   } else if (diag != 0.0) {
1661     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1662     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1663        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1664     for (i=0; i<slen; i++) {
1665       row  = lrows[i] + rstart_bs;
1666       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1667     }
1668     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1669     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1670   } else {
1671     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1672   }
1673 
1674   ierr = PetscFree(lrows);CHKERRQ(ierr);
1675 
1676   /* wait on sends */
1677   if (nsends) {
1678     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1679     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1680     ierr = PetscFree(send_status);CHKERRQ(ierr);
1681   }
1682   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1683   ierr = PetscFree(svalues);CHKERRQ(ierr);
1684 
1685   PetscFunctionReturn(0);
1686 }
1687 
1688 #undef __FUNCT__
1689 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1690 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1691 {
1692   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1693   PetscErrorCode ierr;
1694 
1695   PetscFunctionBegin;
1696   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1697   PetscFunctionReturn(0);
1698 }
1699 
1700 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1701 
1702 #undef __FUNCT__
1703 #define __FUNCT__ "MatEqual_MPIBAIJ"
1704 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1705 {
1706   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1707   Mat            a,b,c,d;
1708   PetscTruth     flg;
1709   PetscErrorCode ierr;
1710 
1711   PetscFunctionBegin;
1712   a = matA->A; b = matA->B;
1713   c = matB->A; d = matB->B;
1714 
1715   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1716   if (flg) {
1717     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1718   }
1719   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1720   PetscFunctionReturn(0);
1721 }
1722 
1723 #undef __FUNCT__
1724 #define __FUNCT__ "MatCopy_MPIBAIJ"
1725 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1726 {
1727   PetscErrorCode ierr;
1728   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1729   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1730 
1731   PetscFunctionBegin;
1732   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1733   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1734     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1735   } else {
1736     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1737     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1738   }
1739   PetscFunctionReturn(0);
1740 }
1741 
1742 #undef __FUNCT__
1743 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1744 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1745 {
1746   PetscErrorCode ierr;
1747 
1748   PetscFunctionBegin;
1749   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1750   PetscFunctionReturn(0);
1751 }
1752 
1753 #undef __FUNCT__
1754 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1755 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1756 {
1757   PetscErrorCode ierr;
1758   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1759   PetscBLASInt   bnz,one=1;
1760   Mat_SeqBAIJ    *x,*y;
1761 
1762   PetscFunctionBegin;
1763   if (str == SAME_NONZERO_PATTERN) {
1764     PetscScalar alpha = a;
1765     x = (Mat_SeqBAIJ *)xx->A->data;
1766     y = (Mat_SeqBAIJ *)yy->A->data;
1767     bnz = PetscBLASIntCast(x->nz);
1768     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1769     x = (Mat_SeqBAIJ *)xx->B->data;
1770     y = (Mat_SeqBAIJ *)yy->B->data;
1771     bnz = PetscBLASIntCast(x->nz);
1772     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1773   } else {
1774     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1775   }
1776   PetscFunctionReturn(0);
1777 }
1778 
1779 #undef __FUNCT__
1780 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ"
1781 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1782 {
1783   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1784   PetscInt rbs,cbs;
1785   PetscErrorCode ierr;
1786 
1787   PetscFunctionBegin;
1788   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1789   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1790   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1791   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1792   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
1793   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
1794   PetscFunctionReturn(0);
1795 }
1796 
1797 #undef __FUNCT__
1798 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1799 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1800 {
1801   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1802   PetscErrorCode ierr;
1803 
1804   PetscFunctionBegin;
1805   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1806   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1807   PetscFunctionReturn(0);
1808 }
1809 
1810 #undef __FUNCT__
1811 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1812 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1813 {
1814   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1815   PetscErrorCode ierr;
1816 
1817   PetscFunctionBegin;
1818   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1819   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1820   PetscFunctionReturn(0);
1821 }
1822 
1823 #undef __FUNCT__
1824 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1825 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1826 {
1827   PetscErrorCode ierr;
1828   IS             iscol_local;
1829   PetscInt       csize;
1830 
1831   PetscFunctionBegin;
1832   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1833   if (call == MAT_REUSE_MATRIX) {
1834     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1835     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1836   } else {
1837     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1838   }
1839   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1840   if (call == MAT_INITIAL_MATRIX) {
1841     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1842     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
1843   }
1844   PetscFunctionReturn(0);
1845 }
1846 
1847 #undef __FUNCT__
1848 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1849 /*
1850     Not great since it makes two copies of the submatrix, first an SeqBAIJ
1851   in local and then by concatenating the local matrices the end result.
1852   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
1853 */
1854 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1855 {
1856   PetscErrorCode ierr;
1857   PetscMPIInt    rank,size;
1858   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1859   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1860   Mat            *local,M,Mreuse;
1861   MatScalar      *vwork,*aa;
1862   MPI_Comm       comm = ((PetscObject)mat)->comm;
1863   Mat_SeqBAIJ    *aij;
1864 
1865 
1866   PetscFunctionBegin;
1867   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1868   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1869 
1870   if (call ==  MAT_REUSE_MATRIX) {
1871     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1872     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1873     local = &Mreuse;
1874     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1875   } else {
1876     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1877     Mreuse = *local;
1878     ierr   = PetscFree(local);CHKERRQ(ierr);
1879   }
1880 
1881   /*
1882       m - number of local rows
1883       n - number of columns (same on all processors)
1884       rstart - first row in new global matrix generated
1885   */
1886   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1887   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1888   m    = m/bs;
1889   n    = n/bs;
1890 
1891   if (call == MAT_INITIAL_MATRIX) {
1892     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1893     ii  = aij->i;
1894     jj  = aij->j;
1895 
1896     /*
1897         Determine the number of non-zeros in the diagonal and off-diagonal
1898         portions of the matrix in order to do correct preallocation
1899     */
1900 
1901     /* first get start and end of "diagonal" columns */
1902     if (csize == PETSC_DECIDE) {
1903       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
1904       if (mglobal == n*bs) { /* square matrix */
1905 	nlocal = m;
1906       } else {
1907         nlocal = n/size + ((n % size) > rank);
1908       }
1909     } else {
1910       nlocal = csize/bs;
1911     }
1912     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1913     rstart = rend - nlocal;
1914     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
1915 
1916     /* next, compute all the lengths */
1917     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
1918     olens = dlens + m;
1919     for (i=0; i<m; i++) {
1920       jend = ii[i+1] - ii[i];
1921       olen = 0;
1922       dlen = 0;
1923       for (j=0; j<jend; j++) {
1924         if (*jj < rstart || *jj >= rend) olen++;
1925         else dlen++;
1926         jj++;
1927       }
1928       olens[i] = olen;
1929       dlens[i] = dlen;
1930     }
1931     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
1932     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
1933     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
1934     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
1935     ierr = PetscFree(dlens);CHKERRQ(ierr);
1936   } else {
1937     PetscInt ml,nl;
1938 
1939     M = *newmat;
1940     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1941     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1942     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1943     /*
1944          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1945        rather than the slower MatSetValues().
1946     */
1947     M->was_assembled = PETSC_TRUE;
1948     M->assembled     = PETSC_FALSE;
1949   }
1950   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1951   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1952   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1953   ii  = aij->i;
1954   jj  = aij->j;
1955   aa  = aij->a;
1956   for (i=0; i<m; i++) {
1957     row   = rstart/bs + i;
1958     nz    = ii[i+1] - ii[i];
1959     cwork = jj;     jj += nz;
1960     vwork = aa;     aa += nz;
1961     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1962   }
1963 
1964   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1965   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1966   *newmat = M;
1967 
1968   /* save submatrix used in processor for next request */
1969   if (call ==  MAT_INITIAL_MATRIX) {
1970     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1971     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1972   }
1973 
1974   PetscFunctionReturn(0);
1975 }
1976 
1977 #undef __FUNCT__
1978 #define __FUNCT__ "MatPermute_MPIBAIJ"
1979 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
1980 {
1981   MPI_Comm       comm,pcomm;
1982   PetscInt       first,local_size,nrows;
1983   const PetscInt *rows;
1984   PetscMPIInt    size;
1985   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1986   PetscErrorCode ierr;
1987 
1988   PetscFunctionBegin;
1989   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1990   /* make a collective version of 'rowp' */
1991   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1992   if (pcomm==comm) {
1993     crowp = rowp;
1994   } else {
1995     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1996     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1997     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1998     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1999   }
2000   /* collect the global row permutation and invert it */
2001   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2002   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2003   if (pcomm!=comm) {
2004     ierr = ISDestroy(crowp);CHKERRQ(ierr);
2005   }
2006   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2007   /* get the local target indices */
2008   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2009   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2010   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2011   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
2012   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2013   ierr = ISDestroy(irowp);CHKERRQ(ierr);
2014   /* the column permutation is so much easier;
2015      make a local version of 'colp' and invert it */
2016   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2017   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2018   if (size==1) {
2019     lcolp = colp;
2020   } else {
2021     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2022     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2023     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
2024   }
2025   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2026   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2027   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2028   if (size>1) {
2029     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2030     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
2031   }
2032   /* now we just get the submatrix */
2033   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2034   /* clean up */
2035   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
2036   ierr = ISDestroy(icolp);CHKERRQ(ierr);
2037   PetscFunctionReturn(0);
2038 }
2039 
2040 #undef __FUNCT__
2041 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2042 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2043 {
2044   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2045   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2046 
2047   PetscFunctionBegin;
2048   if (nghosts) { *nghosts = B->nbs;}
2049   if (ghosts) {*ghosts = baij->garray;}
2050   PetscFunctionReturn(0);
2051 }
2052 
2053 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2054 
2055 #undef __FUNCT__
2056 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2057 /*
2058     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2059 */
2060 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2061 {
2062   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2063   PetscErrorCode        ierr;
2064   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2065   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2066   const PetscInt        *is;
2067   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2068   PetscInt              *rowhit,M,cstart,cend,colb;
2069   PetscInt              *columnsforrow,l;
2070   IS                    *isa;
2071   PetscTruth             done,flg;
2072   ISLocalToGlobalMapping map = mat->bmapping;
2073   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2074 
2075   PetscFunctionBegin;
2076   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2077   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2078 
2079   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2080   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2081   M                = mat->rmap->n/bs;
2082   cstart           = mat->cmap->rstart/bs;
2083   cend             = mat->cmap->rend/bs;
2084   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2085   c->N             = mat->cmap->N/bs;
2086   c->m             = mat->rmap->n/bs;
2087   c->rstart        = mat->rmap->rstart/bs;
2088 
2089   c->ncolors       = nis;
2090   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2091   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2092   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2093   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2094   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2095   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2096 
2097   /* Allow access to data structures of local part of matrix */
2098   if (!baij->colmap) {
2099     ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2100   }
2101   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2102   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2103 
2104   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2105   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2106 
2107   for (i=0; i<nis; i++) {
2108     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2109     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2110     c->ncolumns[i] = n;
2111     if (n) {
2112       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2113       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2114       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2115     } else {
2116       c->columns[i]  = 0;
2117     }
2118 
2119     if (ctype == IS_COLORING_GLOBAL){
2120       /* Determine the total (parallel) number of columns of this color */
2121       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2122       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2123 
2124       nn   = PetscMPIIntCast(n);
2125       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2126       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2127       if (!nctot) {
2128         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2129       }
2130 
2131       disp[0] = 0;
2132       for (j=1; j<size; j++) {
2133         disp[j] = disp[j-1] + ncolsonproc[j-1];
2134       }
2135 
2136       /* Get complete list of columns for color on each processor */
2137       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2138       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2139       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2140     } else if (ctype == IS_COLORING_GHOSTED){
2141       /* Determine local number of columns of this color on this process, including ghost points */
2142       nctot = n;
2143       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2144       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2145     } else {
2146       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2147     }
2148 
2149     /*
2150        Mark all rows affect by these columns
2151     */
2152     /* Temporary option to allow for debugging/testing */
2153     flg  = PETSC_FALSE;
2154     ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2155     if (!flg) {/*-----------------------------------------------------------------------------*/
2156       /* crude, fast version */
2157       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2158       /* loop over columns*/
2159       for (j=0; j<nctot; j++) {
2160         if (ctype == IS_COLORING_GHOSTED) {
2161           col = ltog[cols[j]];
2162         } else {
2163           col  = cols[j];
2164         }
2165         if (col >= cstart && col < cend) {
2166           /* column is in diagonal block of matrix */
2167           rows = A_cj + A_ci[col-cstart];
2168           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2169         } else {
2170 #if defined (PETSC_USE_CTABLE)
2171           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2172 	  colb --;
2173 #else
2174           colb = baij->colmap[col] - 1;
2175 #endif
2176           if (colb == -1) {
2177             m = 0;
2178           } else {
2179             colb = colb/bs;
2180             rows = B_cj + B_ci[colb];
2181             m    = B_ci[colb+1] - B_ci[colb];
2182           }
2183         }
2184         /* loop over columns marking them in rowhit */
2185         for (k=0; k<m; k++) {
2186           rowhit[*rows++] = col + 1;
2187         }
2188       }
2189 
2190       /* count the number of hits */
2191       nrows = 0;
2192       for (j=0; j<M; j++) {
2193         if (rowhit[j]) nrows++;
2194       }
2195       c->nrows[i]         = nrows;
2196       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2197       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2198       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2199       nrows = 0;
2200       for (j=0; j<M; j++) {
2201         if (rowhit[j]) {
2202           c->rows[i][nrows]           = j;
2203           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2204           nrows++;
2205         }
2206       }
2207     } else {/*-------------------------------------------------------------------------------*/
2208       /* slow version, using rowhit as a linked list */
2209       PetscInt currentcol,fm,mfm;
2210       rowhit[M] = M;
2211       nrows     = 0;
2212       /* loop over columns*/
2213       for (j=0; j<nctot; j++) {
2214         if (ctype == IS_COLORING_GHOSTED) {
2215           col = ltog[cols[j]];
2216         } else {
2217           col  = cols[j];
2218         }
2219         if (col >= cstart && col < cend) {
2220           /* column is in diagonal block of matrix */
2221           rows = A_cj + A_ci[col-cstart];
2222           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2223         } else {
2224 #if defined (PETSC_USE_CTABLE)
2225           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2226           colb --;
2227 #else
2228           colb = baij->colmap[col] - 1;
2229 #endif
2230           if (colb == -1) {
2231             m = 0;
2232           } else {
2233             colb = colb/bs;
2234             rows = B_cj + B_ci[colb];
2235             m    = B_ci[colb+1] - B_ci[colb];
2236           }
2237         }
2238 
2239         /* loop over columns marking them in rowhit */
2240         fm    = M; /* fm points to first entry in linked list */
2241         for (k=0; k<m; k++) {
2242           currentcol = *rows++;
2243 	  /* is it already in the list? */
2244           do {
2245             mfm  = fm;
2246             fm   = rowhit[fm];
2247           } while (fm < currentcol);
2248           /* not in list so add it */
2249           if (fm != currentcol) {
2250             nrows++;
2251             columnsforrow[currentcol] = col;
2252             /* next three lines insert new entry into linked list */
2253             rowhit[mfm]               = currentcol;
2254             rowhit[currentcol]        = fm;
2255             fm                        = currentcol;
2256             /* fm points to present position in list since we know the columns are sorted */
2257           } else {
2258             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2259           }
2260         }
2261       }
2262       c->nrows[i]         = nrows;
2263       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2264       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2265       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2266       /* now store the linked list of rows into c->rows[i] */
2267       nrows = 0;
2268       fm    = rowhit[M];
2269       do {
2270         c->rows[i][nrows]            = fm;
2271         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2272         fm                           = rowhit[fm];
2273       } while (fm < M);
2274     } /* ---------------------------------------------------------------------------------------*/
2275     ierr = PetscFree(cols);CHKERRQ(ierr);
2276   }
2277 
2278   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2279   /*
2280        vscale will contain the "diagonal" on processor scalings followed by the off processor
2281   */
2282   if (ctype == IS_COLORING_GLOBAL) {
2283     PetscInt *garray;
2284     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2285     for (i=0; i<baij->B->cmap->n/bs; i++) {
2286       for (j=0; j<bs; j++) {
2287         garray[i*bs+j] = bs*baij->garray[i]+j;
2288       }
2289     }
2290     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2291     ierr = PetscFree(garray);CHKERRQ(ierr);
2292     CHKMEMQ;
2293     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2294     for (k=0; k<c->ncolors; k++) {
2295       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2296       for (l=0; l<c->nrows[k]; l++) {
2297         col = c->columnsforrow[k][l];
2298         if (col >= cstart && col < cend) {
2299           /* column is in diagonal block of matrix */
2300           colb = col - cstart;
2301         } else {
2302           /* column  is in "off-processor" part */
2303 #if defined (PETSC_USE_CTABLE)
2304           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2305           colb --;
2306 #else
2307           colb = baij->colmap[col] - 1;
2308 #endif
2309           colb = colb/bs;
2310           colb += cend - cstart;
2311         }
2312         c->vscaleforrow[k][l] = colb;
2313       }
2314     }
2315   } else if (ctype == IS_COLORING_GHOSTED) {
2316     /* Get gtol mapping */
2317     PetscInt N = mat->cmap->N, *gtol;
2318     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2319     for (i=0; i<N; i++) gtol[i] = -1;
2320     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2321 
2322     c->vscale = 0; /* will be created in MatFDColoringApply() */
2323     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2324     for (k=0; k<c->ncolors; k++) {
2325       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2326       for (l=0; l<c->nrows[k]; l++) {
2327         col = c->columnsforrow[k][l];      /* global column index */
2328         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2329       }
2330     }
2331     ierr = PetscFree(gtol);CHKERRQ(ierr);
2332   }
2333   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2334 
2335   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2336   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2337   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2338   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2339     CHKMEMQ;
2340   PetscFunctionReturn(0);
2341 }
2342 
2343 #undef __FUNCT__
2344 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ"
2345 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat)
2346 {
2347   Mat            B;
2348   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2349   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2350   Mat_SeqAIJ     *b;
2351   PetscErrorCode ierr;
2352   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2353   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2354   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2355 
2356   PetscFunctionBegin;
2357   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2358   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2359 
2360   /* ----------------------------------------------------------------
2361      Tell every processor the number of nonzeros per row
2362   */
2363   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2364   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2365     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2366   }
2367   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2368   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2369   displs     = recvcounts + size;
2370   for (i=0; i<size; i++) {
2371     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2372     displs[i]     = A->rmap->range[i]/bs;
2373   }
2374 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2375   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2376 #else
2377   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2378 #endif
2379   /* ---------------------------------------------------------------
2380      Create the sequential matrix of the same type as the local block diagonal
2381   */
2382   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2383   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2384   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2385   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2386   b = (Mat_SeqAIJ *)B->data;
2387 
2388   /*--------------------------------------------------------------------
2389     Copy my part of matrix column indices over
2390   */
2391   sendcount  = ad->nz + bd->nz;
2392   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2393   a_jsendbuf = ad->j;
2394   b_jsendbuf = bd->j;
2395   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2396   cnt        = 0;
2397   for (i=0; i<n; i++) {
2398 
2399     /* put in lower diagonal portion */
2400     m = bd->i[i+1] - bd->i[i];
2401     while (m > 0) {
2402       /* is it above diagonal (in bd (compressed) numbering) */
2403       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2404       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2405       m--;
2406     }
2407 
2408     /* put in diagonal portion */
2409     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2410       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2411     }
2412 
2413     /* put in upper diagonal portion */
2414     while (m-- > 0) {
2415       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2416     }
2417   }
2418   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2419 
2420   /*--------------------------------------------------------------------
2421     Gather all column indices to all processors
2422   */
2423   for (i=0; i<size; i++) {
2424     recvcounts[i] = 0;
2425     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2426       recvcounts[i] += lens[j];
2427     }
2428   }
2429   displs[0]  = 0;
2430   for (i=1; i<size; i++) {
2431     displs[i] = displs[i-1] + recvcounts[i-1];
2432   }
2433 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2434   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2435 #else
2436   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2437 #endif
2438   /*--------------------------------------------------------------------
2439     Assemble the matrix into useable form (note numerical values not yet set)
2440   */
2441   /* set the b->ilen (length of each row) values */
2442   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2443   /* set the b->i indices */
2444   b->i[0] = 0;
2445   for (i=1; i<=A->rmap->N/bs; i++) {
2446     b->i[i] = b->i[i-1] + lens[i-1];
2447   }
2448   ierr = PetscFree(lens);CHKERRQ(ierr);
2449   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2450   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2451   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2452 
2453   if (A->symmetric){
2454     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2455   } else if (A->hermitian) {
2456     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2457   } else if (A->structurally_symmetric) {
2458     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2459   }
2460   *newmat = B;
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 #undef __FUNCT__
2465 #define __FUNCT__ "MatSOR_MPIBAIJ"
2466 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2467 {
2468   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2469   PetscErrorCode ierr;
2470   Vec            bb1 = 0;
2471 
2472   PetscFunctionBegin;
2473   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2474     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2475   }
2476 
2477   if (flag == SOR_APPLY_UPPER) {
2478     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2479     PetscFunctionReturn(0);
2480   }
2481 
2482   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2483     if (flag & SOR_ZERO_INITIAL_GUESS) {
2484       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2485       its--;
2486     }
2487 
2488     while (its--) {
2489       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2490       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2491 
2492       /* update rhs: bb1 = bb - B*x */
2493       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2494       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2495 
2496       /* local sweep */
2497       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2498     }
2499   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2500     if (flag & SOR_ZERO_INITIAL_GUESS) {
2501       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2502       its--;
2503     }
2504     while (its--) {
2505       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2506       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2507 
2508       /* update rhs: bb1 = bb - B*x */
2509       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2510       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2511 
2512       /* local sweep */
2513       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2514     }
2515   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2516     if (flag & SOR_ZERO_INITIAL_GUESS) {
2517       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2518       its--;
2519     }
2520     while (its--) {
2521       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2522       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2523 
2524       /* update rhs: bb1 = bb - B*x */
2525       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2526       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2527 
2528       /* local sweep */
2529       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2530     }
2531   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2532 
2533   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
2534   PetscFunctionReturn(0);
2535 }
2536 
2537 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2538 
2539 
2540 /* -------------------------------------------------------------------*/
2541 static struct _MatOps MatOps_Values = {
2542        MatSetValues_MPIBAIJ,
2543        MatGetRow_MPIBAIJ,
2544        MatRestoreRow_MPIBAIJ,
2545        MatMult_MPIBAIJ,
2546 /* 4*/ MatMultAdd_MPIBAIJ,
2547        MatMultTranspose_MPIBAIJ,
2548        MatMultTransposeAdd_MPIBAIJ,
2549        0,
2550        0,
2551        0,
2552 /*10*/ 0,
2553        0,
2554        0,
2555        MatSOR_MPIBAIJ,
2556        MatTranspose_MPIBAIJ,
2557 /*15*/ MatGetInfo_MPIBAIJ,
2558        MatEqual_MPIBAIJ,
2559        MatGetDiagonal_MPIBAIJ,
2560        MatDiagonalScale_MPIBAIJ,
2561        MatNorm_MPIBAIJ,
2562 /*20*/ MatAssemblyBegin_MPIBAIJ,
2563        MatAssemblyEnd_MPIBAIJ,
2564        MatSetOption_MPIBAIJ,
2565        MatZeroEntries_MPIBAIJ,
2566 /*24*/ MatZeroRows_MPIBAIJ,
2567        0,
2568        0,
2569        0,
2570        0,
2571 /*29*/ MatSetUpPreallocation_MPIBAIJ,
2572        0,
2573        0,
2574        0,
2575        0,
2576 /*34*/ MatDuplicate_MPIBAIJ,
2577        0,
2578        0,
2579        0,
2580        0,
2581 /*39*/ MatAXPY_MPIBAIJ,
2582        MatGetSubMatrices_MPIBAIJ,
2583        MatIncreaseOverlap_MPIBAIJ,
2584        MatGetValues_MPIBAIJ,
2585        MatCopy_MPIBAIJ,
2586 /*44*/ 0,
2587        MatScale_MPIBAIJ,
2588        0,
2589        0,
2590        0,
2591 /*49*/ MatSetBlockSize_MPIBAIJ,
2592        0,
2593        0,
2594        0,
2595        0,
2596 /*54*/ MatFDColoringCreate_MPIBAIJ,
2597        0,
2598        MatSetUnfactored_MPIBAIJ,
2599        MatPermute_MPIBAIJ,
2600        MatSetValuesBlocked_MPIBAIJ,
2601 /*59*/ MatGetSubMatrix_MPIBAIJ,
2602        MatDestroy_MPIBAIJ,
2603        MatView_MPIBAIJ,
2604        0,
2605        0,
2606 /*64*/ 0,
2607        0,
2608        0,
2609        0,
2610        0,
2611 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2612        0,
2613        0,
2614        0,
2615        0,
2616 /*74*/ 0,
2617        MatFDColoringApply_BAIJ,
2618        0,
2619        0,
2620        0,
2621 /*79*/ 0,
2622        0,
2623        0,
2624        0,
2625        MatLoad_MPIBAIJ,
2626 /*84*/ 0,
2627        0,
2628        0,
2629        0,
2630        0,
2631 /*89*/ 0,
2632        0,
2633        0,
2634        0,
2635        0,
2636 /*94*/ 0,
2637        0,
2638        0,
2639        0,
2640        0,
2641 /*99*/ 0,
2642        0,
2643        0,
2644        0,
2645        0,
2646 /*104*/0,
2647        MatRealPart_MPIBAIJ,
2648        MatImaginaryPart_MPIBAIJ,
2649        0,
2650        0,
2651 /*109*/0,
2652        0,
2653        0,
2654        0,
2655        0,
2656 /*114*/MatGetSeqNonzerostructure_MPIBAIJ,
2657        0,
2658        MatGetGhosts_MPIBAIJ
2659 };
2660 
2661 EXTERN_C_BEGIN
2662 #undef __FUNCT__
2663 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2664 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2665 {
2666   PetscFunctionBegin;
2667   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2668   *iscopy = PETSC_FALSE;
2669   PetscFunctionReturn(0);
2670 }
2671 EXTERN_C_END
2672 
2673 EXTERN_C_BEGIN
2674 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2675 EXTERN_C_END
2676 
2677 EXTERN_C_BEGIN
2678 #undef __FUNCT__
2679 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2680 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2681 {
2682   PetscInt       m,rstart,cstart,cend;
2683   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2684   const PetscInt *JJ=0;
2685   PetscScalar    *values=0;
2686   PetscErrorCode ierr;
2687 
2688   PetscFunctionBegin;
2689 
2690   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2691   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2692   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2693   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2694   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2695   m      = B->rmap->n/bs;
2696   rstart = B->rmap->rstart/bs;
2697   cstart = B->cmap->rstart/bs;
2698   cend   = B->cmap->rend/bs;
2699 
2700   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2701   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2702   for (i=0; i<m; i++) {
2703     nz = ii[i+1] - ii[i];
2704     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2705     nz_max = PetscMax(nz_max,nz);
2706     JJ  = jj + ii[i];
2707     for (j=0; j<nz; j++) {
2708       if (*JJ >= cstart) break;
2709       JJ++;
2710     }
2711     d = 0;
2712     for (; j<nz; j++) {
2713       if (*JJ++ >= cend) break;
2714       d++;
2715     }
2716     d_nnz[i] = d;
2717     o_nnz[i] = nz - d;
2718   }
2719   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2720   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2721 
2722   values = (PetscScalar*)V;
2723   if (!values) {
2724     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2725     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2726   }
2727   for (i=0; i<m; i++) {
2728     PetscInt          row    = i + rstart;
2729     PetscInt          ncols  = ii[i+1] - ii[i];
2730     const PetscInt    *icols = jj + ii[i];
2731     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2732     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2733   }
2734 
2735   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2736   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2737   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2738 
2739   PetscFunctionReturn(0);
2740 }
2741 EXTERN_C_END
2742 
2743 #undef __FUNCT__
2744 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2745 /*@C
2746    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2747    (the default parallel PETSc format).
2748 
2749    Collective on MPI_Comm
2750 
2751    Input Parameters:
2752 +  A - the matrix
2753 .  i - the indices into j for the start of each local row (starts with zero)
2754 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2755 -  v - optional values in the matrix
2756 
2757    Level: developer
2758 
2759 .keywords: matrix, aij, compressed row, sparse, parallel
2760 
2761 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2762 @*/
2763 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2764 {
2765   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2766 
2767   PetscFunctionBegin;
2768   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2769   if (f) {
2770     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2771   }
2772   PetscFunctionReturn(0);
2773 }
2774 
2775 EXTERN_C_BEGIN
2776 #undef __FUNCT__
2777 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2778 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2779 {
2780   Mat_MPIBAIJ    *b;
2781   PetscErrorCode ierr;
2782   PetscInt       i, newbs = PetscAbs(bs);
2783 
2784   PetscFunctionBegin;
2785   if (bs < 0) {
2786     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2787       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2788     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2789     bs   = PetscAbs(bs);
2790   }
2791   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
2792   bs = newbs;
2793 
2794 
2795   if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2796   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2797   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2798   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2799   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2800 
2801   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2802   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2803   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2804   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2805 
2806   if (d_nnz) {
2807     for (i=0; i<B->rmap->n/bs; i++) {
2808       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2809     }
2810   }
2811   if (o_nnz) {
2812     for (i=0; i<B->rmap->n/bs; i++) {
2813       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2814     }
2815   }
2816 
2817   b = (Mat_MPIBAIJ*)B->data;
2818   b->bs2 = bs*bs;
2819   b->mbs = B->rmap->n/bs;
2820   b->nbs = B->cmap->n/bs;
2821   b->Mbs = B->rmap->N/bs;
2822   b->Nbs = B->cmap->N/bs;
2823 
2824   for (i=0; i<=b->size; i++) {
2825     b->rangebs[i] = B->rmap->range[i]/bs;
2826   }
2827   b->rstartbs = B->rmap->rstart/bs;
2828   b->rendbs   = B->rmap->rend/bs;
2829   b->cstartbs = B->cmap->rstart/bs;
2830   b->cendbs   = B->cmap->rend/bs;
2831 
2832   if (!B->preallocated) {
2833     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2834     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2835     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2836     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2837     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2838     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2839     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2840     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2841     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
2842   }
2843 
2844   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2845   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2846   B->preallocated = PETSC_TRUE;
2847   PetscFunctionReturn(0);
2848 }
2849 EXTERN_C_END
2850 
2851 EXTERN_C_BEGIN
2852 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2853 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2854 EXTERN_C_END
2855 
2856 
2857 EXTERN_C_BEGIN
2858 #undef __FUNCT__
2859 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2860 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
2861 {
2862   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2863   PetscErrorCode ierr;
2864   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2865   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2866   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2867 
2868   PetscFunctionBegin;
2869   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
2870   ii[0] = 0;
2871   CHKMEMQ;
2872   for (i=0; i<M; i++) {
2873     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2874     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2875     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2876     /* remove one from count of matrix has diagonal */
2877     for (j=id[i]; j<id[i+1]; j++) {
2878       if (jd[j] == i) {ii[i+1]--;break;}
2879     }
2880   CHKMEMQ;
2881   }
2882   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2883   cnt = 0;
2884   for (i=0; i<M; i++) {
2885     for (j=io[i]; j<io[i+1]; j++) {
2886       if (garray[jo[j]] > rstart) break;
2887       jj[cnt++] = garray[jo[j]];
2888   CHKMEMQ;
2889     }
2890     for (k=id[i]; k<id[i+1]; k++) {
2891       if (jd[k] != i) {
2892         jj[cnt++] = rstart + jd[k];
2893   CHKMEMQ;
2894       }
2895     }
2896     for (;j<io[i+1]; j++) {
2897       jj[cnt++] = garray[jo[j]];
2898   CHKMEMQ;
2899     }
2900   }
2901   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
2902   PetscFunctionReturn(0);
2903 }
2904 EXTERN_C_END
2905 
2906 EXTERN_C_BEGIN
2907 #if defined(PETSC_HAVE_MUMPS)
2908 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
2909 #endif
2910 EXTERN_C_END
2911 
2912 /*MC
2913    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2914 
2915    Options Database Keys:
2916 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2917 . -mat_block_size <bs> - set the blocksize used to store the matrix
2918 - -mat_use_hash_table <fact>
2919 
2920   Level: beginner
2921 
2922 .seealso: MatCreateMPIBAIJ
2923 M*/
2924 
2925 EXTERN_C_BEGIN
2926 #undef __FUNCT__
2927 #define __FUNCT__ "MatCreate_MPIBAIJ"
2928 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2929 {
2930   Mat_MPIBAIJ    *b;
2931   PetscErrorCode ierr;
2932   PetscTruth     flg;
2933 
2934   PetscFunctionBegin;
2935   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2936   B->data = (void*)b;
2937 
2938 
2939   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2940   B->mapping    = 0;
2941   B->assembled  = PETSC_FALSE;
2942 
2943   B->insertmode = NOT_SET_VALUES;
2944   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
2945   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
2946 
2947   /* build local table of row and column ownerships */
2948   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
2949 
2950   /* build cache for off array entries formed */
2951   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
2952   b->donotstash  = PETSC_FALSE;
2953   b->colmap      = PETSC_NULL;
2954   b->garray      = PETSC_NULL;
2955   b->roworiented = PETSC_TRUE;
2956 
2957   /* stuff used in block assembly */
2958   b->barray       = 0;
2959 
2960   /* stuff used for matrix vector multiply */
2961   b->lvec         = 0;
2962   b->Mvctx        = 0;
2963 
2964   /* stuff for MatGetRow() */
2965   b->rowindices   = 0;
2966   b->rowvalues    = 0;
2967   b->getrowactive = PETSC_FALSE;
2968 
2969   /* hash table stuff */
2970   b->ht           = 0;
2971   b->hd           = 0;
2972   b->ht_size      = 0;
2973   b->ht_flag      = PETSC_FALSE;
2974   b->ht_fact      = 0;
2975   b->ht_total_ct  = 0;
2976   b->ht_insert_ct = 0;
2977 
2978   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2979     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2980     if (flg) {
2981       PetscReal fact = 1.39;
2982       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2983       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
2984       if (fact <= 1.0) fact = 1.39;
2985       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2986       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2987     }
2988   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2989 
2990 #if defined(PETSC_HAVE_MUMPS)
2991   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
2992 #endif
2993   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
2994                                      "MatConvert_MPIBAIJ_MPIAdj",
2995                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
2996   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2997                                      "MatStoreValues_MPIBAIJ",
2998                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2999   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3000                                      "MatRetrieveValues_MPIBAIJ",
3001                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3002   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3003                                      "MatGetDiagonalBlock_MPIBAIJ",
3004                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3005   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3006                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3007                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3008   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3009 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3010 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3011   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3012                                      "MatDiagonalScaleLocal_MPIBAIJ",
3013                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3014   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3015                                      "MatSetHashTableFactor_MPIBAIJ",
3016                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3017   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3018   PetscFunctionReturn(0);
3019 }
3020 EXTERN_C_END
3021 
3022 /*MC
3023    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3024 
3025    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3026    and MATMPIBAIJ otherwise.
3027 
3028    Options Database Keys:
3029 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3030 
3031   Level: beginner
3032 
3033 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3034 M*/
3035 
3036 EXTERN_C_BEGIN
3037 #undef __FUNCT__
3038 #define __FUNCT__ "MatCreate_BAIJ"
3039 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
3040 {
3041   PetscErrorCode ierr;
3042   PetscMPIInt    size;
3043 
3044   PetscFunctionBegin;
3045   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3046   if (size == 1) {
3047     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
3048   } else {
3049     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
3050   }
3051   PetscFunctionReturn(0);
3052 }
3053 EXTERN_C_END
3054 
3055 #undef __FUNCT__
3056 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3057 /*@C
3058    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3059    (block compressed row).  For good matrix assembly performance
3060    the user should preallocate the matrix storage by setting the parameters
3061    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3062    performance can be increased by more than a factor of 50.
3063 
3064    Collective on Mat
3065 
3066    Input Parameters:
3067 +  A - the matrix
3068 .  bs   - size of blockk
3069 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3070            submatrix  (same for all local rows)
3071 .  d_nnz - array containing the number of block nonzeros in the various block rows
3072            of the in diagonal portion of the local (possibly different for each block
3073            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3074 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3075            submatrix (same for all local rows).
3076 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3077            off-diagonal portion of the local submatrix (possibly different for
3078            each block row) or PETSC_NULL.
3079 
3080    If the *_nnz parameter is given then the *_nz parameter is ignored
3081 
3082    Options Database Keys:
3083 +   -mat_block_size - size of the blocks to use
3084 -   -mat_use_hash_table <fact>
3085 
3086    Notes:
3087    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3088    than it must be used on all processors that share the object for that argument.
3089 
3090    Storage Information:
3091    For a square global matrix we define each processor's diagonal portion
3092    to be its local rows and the corresponding columns (a square submatrix);
3093    each processor's off-diagonal portion encompasses the remainder of the
3094    local matrix (a rectangular submatrix).
3095 
3096    The user can specify preallocated storage for the diagonal part of
3097    the local submatrix with either d_nz or d_nnz (not both).  Set
3098    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3099    memory allocation.  Likewise, specify preallocated storage for the
3100    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3101 
3102    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3103    the figure below we depict these three local rows and all columns (0-11).
3104 
3105 .vb
3106            0 1 2 3 4 5 6 7 8 9 10 11
3107           -------------------
3108    row 3  |  o o o d d d o o o o o o
3109    row 4  |  o o o d d d o o o o o o
3110    row 5  |  o o o d d d o o o o o o
3111           -------------------
3112 .ve
3113 
3114    Thus, any entries in the d locations are stored in the d (diagonal)
3115    submatrix, and any entries in the o locations are stored in the
3116    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3117    stored simply in the MATSEQBAIJ format for compressed row storage.
3118 
3119    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3120    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3121    In general, for PDE problems in which most nonzeros are near the diagonal,
3122    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3123    or you will get TERRIBLE performance; see the users' manual chapter on
3124    matrices.
3125 
3126    You can call MatGetInfo() to get information on how effective the preallocation was;
3127    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3128    You can also run with the option -info and look for messages with the string
3129    malloc in them to see if additional memory allocation was needed.
3130 
3131    Level: intermediate
3132 
3133 .keywords: matrix, block, aij, compressed row, sparse, parallel
3134 
3135 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3136 @*/
3137 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3138 {
3139   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3140 
3141   PetscFunctionBegin;
3142   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3143   if (f) {
3144     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3145   }
3146   PetscFunctionReturn(0);
3147 }
3148 
3149 #undef __FUNCT__
3150 #define __FUNCT__ "MatCreateMPIBAIJ"
3151 /*@C
3152    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3153    (block compressed row).  For good matrix assembly performance
3154    the user should preallocate the matrix storage by setting the parameters
3155    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3156    performance can be increased by more than a factor of 50.
3157 
3158    Collective on MPI_Comm
3159 
3160    Input Parameters:
3161 +  comm - MPI communicator
3162 .  bs   - size of blockk
3163 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3164            This value should be the same as the local size used in creating the
3165            y vector for the matrix-vector product y = Ax.
3166 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3167            This value should be the same as the local size used in creating the
3168            x vector for the matrix-vector product y = Ax.
3169 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3170 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3171 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3172            submatrix  (same for all local rows)
3173 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3174            of the in diagonal portion of the local (possibly different for each block
3175            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3176 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3177            submatrix (same for all local rows).
3178 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3179            off-diagonal portion of the local submatrix (possibly different for
3180            each block row) or PETSC_NULL.
3181 
3182    Output Parameter:
3183 .  A - the matrix
3184 
3185    Options Database Keys:
3186 +   -mat_block_size - size of the blocks to use
3187 -   -mat_use_hash_table <fact>
3188 
3189    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3190    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3191    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3192 
3193    Notes:
3194    If the *_nnz parameter is given then the *_nz parameter is ignored
3195 
3196    A nonzero block is any block that as 1 or more nonzeros in it
3197 
3198    The user MUST specify either the local or global matrix dimensions
3199    (possibly both).
3200 
3201    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3202    than it must be used on all processors that share the object for that argument.
3203 
3204    Storage Information:
3205    For a square global matrix we define each processor's diagonal portion
3206    to be its local rows and the corresponding columns (a square submatrix);
3207    each processor's off-diagonal portion encompasses the remainder of the
3208    local matrix (a rectangular submatrix).
3209 
3210    The user can specify preallocated storage for the diagonal part of
3211    the local submatrix with either d_nz or d_nnz (not both).  Set
3212    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3213    memory allocation.  Likewise, specify preallocated storage for the
3214    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3215 
3216    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3217    the figure below we depict these three local rows and all columns (0-11).
3218 
3219 .vb
3220            0 1 2 3 4 5 6 7 8 9 10 11
3221           -------------------
3222    row 3  |  o o o d d d o o o o o o
3223    row 4  |  o o o d d d o o o o o o
3224    row 5  |  o o o d d d o o o o o o
3225           -------------------
3226 .ve
3227 
3228    Thus, any entries in the d locations are stored in the d (diagonal)
3229    submatrix, and any entries in the o locations are stored in the
3230    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3231    stored simply in the MATSEQBAIJ format for compressed row storage.
3232 
3233    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3234    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3235    In general, for PDE problems in which most nonzeros are near the diagonal,
3236    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3237    or you will get TERRIBLE performance; see the users' manual chapter on
3238    matrices.
3239 
3240    Level: intermediate
3241 
3242 .keywords: matrix, block, aij, compressed row, sparse, parallel
3243 
3244 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3245 @*/
3246 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3247 {
3248   PetscErrorCode ierr;
3249   PetscMPIInt    size;
3250 
3251   PetscFunctionBegin;
3252   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3253   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3254   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3255   if (size > 1) {
3256     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3257     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3258   } else {
3259     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3260     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3261   }
3262   PetscFunctionReturn(0);
3263 }
3264 
3265 #undef __FUNCT__
3266 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3267 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3268 {
3269   Mat            mat;
3270   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3271   PetscErrorCode ierr;
3272   PetscInt       len=0;
3273 
3274   PetscFunctionBegin;
3275   *newmat       = 0;
3276   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3277   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3278   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3279   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3280 
3281   mat->factortype   = matin->factortype;
3282   mat->preallocated = PETSC_TRUE;
3283   mat->assembled    = PETSC_TRUE;
3284   mat->insertmode   = NOT_SET_VALUES;
3285 
3286   a      = (Mat_MPIBAIJ*)mat->data;
3287   mat->rmap->bs  = matin->rmap->bs;
3288   a->bs2   = oldmat->bs2;
3289   a->mbs   = oldmat->mbs;
3290   a->nbs   = oldmat->nbs;
3291   a->Mbs   = oldmat->Mbs;
3292   a->Nbs   = oldmat->Nbs;
3293 
3294   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3295   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3296 
3297   a->size         = oldmat->size;
3298   a->rank         = oldmat->rank;
3299   a->donotstash   = oldmat->donotstash;
3300   a->roworiented  = oldmat->roworiented;
3301   a->rowindices   = 0;
3302   a->rowvalues    = 0;
3303   a->getrowactive = PETSC_FALSE;
3304   a->barray       = 0;
3305   a->rstartbs     = oldmat->rstartbs;
3306   a->rendbs       = oldmat->rendbs;
3307   a->cstartbs     = oldmat->cstartbs;
3308   a->cendbs       = oldmat->cendbs;
3309 
3310   /* hash table stuff */
3311   a->ht           = 0;
3312   a->hd           = 0;
3313   a->ht_size      = 0;
3314   a->ht_flag      = oldmat->ht_flag;
3315   a->ht_fact      = oldmat->ht_fact;
3316   a->ht_total_ct  = 0;
3317   a->ht_insert_ct = 0;
3318 
3319   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3320   if (oldmat->colmap) {
3321 #if defined (PETSC_USE_CTABLE)
3322   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3323 #else
3324   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3325   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3326   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3327 #endif
3328   } else a->colmap = 0;
3329 
3330   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3331     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3332     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3333     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3334   } else a->garray = 0;
3335 
3336   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3337   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3338   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3339   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3340   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3341 
3342   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3343   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3344   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3345   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3346   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3347   *newmat = mat;
3348 
3349   PetscFunctionReturn(0);
3350 }
3351 
3352 #undef __FUNCT__
3353 #define __FUNCT__ "MatLoad_MPIBAIJ"
3354 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
3355 {
3356   Mat            A;
3357   PetscErrorCode ierr;
3358   int            fd;
3359   PetscInt       i,nz,j,rstart,rend;
3360   PetscScalar    *vals,*buf;
3361   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3362   MPI_Status     status;
3363   PetscMPIInt    rank,size,maxnz;
3364   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3365   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3366   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3367   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3368   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3369   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
3370 
3371   PetscFunctionBegin;
3372   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3373     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3374   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3375 
3376   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3377   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3378   if (!rank) {
3379     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3380     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3381     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3382   }
3383 
3384   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3385   M = header[1]; N = header[2];
3386 
3387   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3388 
3389   /*
3390      This code adds extra rows to make sure the number of rows is
3391      divisible by the blocksize
3392   */
3393   Mbs        = M/bs;
3394   extra_rows = bs - M + bs*Mbs;
3395   if (extra_rows == bs) extra_rows = 0;
3396   else                  Mbs++;
3397   if (extra_rows && !rank) {
3398     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3399   }
3400 
3401   /* determine ownership of all rows */
3402   mbs        = Mbs/size + ((Mbs % size) > rank);
3403   m          = mbs*bs;
3404   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3405   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3406 
3407   /* process 0 needs enough room for process with most rows */
3408   if (!rank) {
3409     mmax = rowners[1];
3410     for (i=2; i<size; i++) {
3411       mmax = PetscMax(mmax,rowners[i]);
3412     }
3413     mmax*=bs;
3414   } else mmax = m;
3415 
3416   rowners[0] = 0;
3417   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3418   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3419   rstart = rowners[rank];
3420   rend   = rowners[rank+1];
3421 
3422   /* distribute row lengths to all processors */
3423   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3424   if (!rank) {
3425     mend = m;
3426     if (size == 1) mend = mend - extra_rows;
3427     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3428     for (j=mend; j<m; j++) locrowlens[j] = 1;
3429     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3430     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3431     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3432     for (j=0; j<m; j++) {
3433       procsnz[0] += locrowlens[j];
3434     }
3435     for (i=1; i<size; i++) {
3436       mend = browners[i+1] - browners[i];
3437       if (i == size-1) mend = mend - extra_rows;
3438       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3439       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3440       /* calculate the number of nonzeros on each processor */
3441       for (j=0; j<browners[i+1]-browners[i]; j++) {
3442         procsnz[i] += rowlengths[j];
3443       }
3444       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3445     }
3446     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3447   } else {
3448     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3449   }
3450 
3451   if (!rank) {
3452     /* determine max buffer needed and allocate it */
3453     maxnz = procsnz[0];
3454     for (i=1; i<size; i++) {
3455       maxnz = PetscMax(maxnz,procsnz[i]);
3456     }
3457     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3458 
3459     /* read in my part of the matrix column indices  */
3460     nz     = procsnz[0];
3461     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3462     mycols = ibuf;
3463     if (size == 1)  nz -= extra_rows;
3464     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3465     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3466 
3467     /* read in every ones (except the last) and ship off */
3468     for (i=1; i<size-1; i++) {
3469       nz   = procsnz[i];
3470       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3471       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3472     }
3473     /* read in the stuff for the last proc */
3474     if (size != 1) {
3475       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3476       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3477       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3478       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3479     }
3480     ierr = PetscFree(cols);CHKERRQ(ierr);
3481   } else {
3482     /* determine buffer space needed for message */
3483     nz = 0;
3484     for (i=0; i<m; i++) {
3485       nz += locrowlens[i];
3486     }
3487     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3488     mycols = ibuf;
3489     /* receive message of column indices*/
3490     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3491     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3492     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3493   }
3494 
3495   /* loop over local rows, determining number of off diagonal entries */
3496   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3497   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3498   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3499   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3500   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3501   rowcount = 0; nzcount = 0;
3502   for (i=0; i<mbs; i++) {
3503     dcount  = 0;
3504     odcount = 0;
3505     for (j=0; j<bs; j++) {
3506       kmax = locrowlens[rowcount];
3507       for (k=0; k<kmax; k++) {
3508         tmp = mycols[nzcount++]/bs;
3509         if (!mask[tmp]) {
3510           mask[tmp] = 1;
3511           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3512           else masked1[dcount++] = tmp;
3513         }
3514       }
3515       rowcount++;
3516     }
3517 
3518     dlens[i]  = dcount;
3519     odlens[i] = odcount;
3520 
3521     /* zero out the mask elements we set */
3522     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3523     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3524   }
3525 
3526   /* create our matrix */
3527   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
3528   ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3529   ierr = MatSetType(A,type);CHKERRQ(ierr);
3530   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3531 
3532   if (!rank) {
3533     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3534     /* read in my part of the matrix numerical values  */
3535     nz = procsnz[0];
3536     vals = buf;
3537     mycols = ibuf;
3538     if (size == 1)  nz -= extra_rows;
3539     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3540     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3541 
3542     /* insert into matrix */
3543     jj      = rstart*bs;
3544     for (i=0; i<m; i++) {
3545       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3546       mycols += locrowlens[i];
3547       vals   += locrowlens[i];
3548       jj++;
3549     }
3550     /* read in other processors (except the last one) and ship out */
3551     for (i=1; i<size-1; i++) {
3552       nz   = procsnz[i];
3553       vals = buf;
3554       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3555       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3556     }
3557     /* the last proc */
3558     if (size != 1){
3559       nz   = procsnz[i] - extra_rows;
3560       vals = buf;
3561       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3562       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3563       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3564     }
3565     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3566   } else {
3567     /* receive numeric values */
3568     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3569 
3570     /* receive message of values*/
3571     vals   = buf;
3572     mycols = ibuf;
3573     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3574     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3575     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3576 
3577     /* insert into matrix */
3578     jj      = rstart*bs;
3579     for (i=0; i<m; i++) {
3580       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3581       mycols += locrowlens[i];
3582       vals   += locrowlens[i];
3583       jj++;
3584     }
3585   }
3586   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3587   ierr = PetscFree(buf);CHKERRQ(ierr);
3588   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3589   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3590   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3591   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3592   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3593   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3594 
3595   *newmat = A;
3596   PetscFunctionReturn(0);
3597 }
3598 
3599 #undef __FUNCT__
3600 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3601 /*@
3602    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3603 
3604    Input Parameters:
3605 .  mat  - the matrix
3606 .  fact - factor
3607 
3608    Collective on Mat
3609 
3610    Level: advanced
3611 
3612   Notes:
3613    This can also be set by the command line option: -mat_use_hash_table <fact>
3614 
3615 .keywords: matrix, hashtable, factor, HT
3616 
3617 .seealso: MatSetOption()
3618 @*/
3619 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3620 {
3621   PetscErrorCode ierr,(*f)(Mat,PetscReal);
3622 
3623   PetscFunctionBegin;
3624   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
3625   if (f) {
3626     ierr = (*f)(mat,fact);CHKERRQ(ierr);
3627   }
3628   PetscFunctionReturn(0);
3629 }
3630 
3631 EXTERN_C_BEGIN
3632 #undef __FUNCT__
3633 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3634 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3635 {
3636   Mat_MPIBAIJ *baij;
3637 
3638   PetscFunctionBegin;
3639   baij = (Mat_MPIBAIJ*)mat->data;
3640   baij->ht_fact = fact;
3641   PetscFunctionReturn(0);
3642 }
3643 EXTERN_C_END
3644 
3645 #undef __FUNCT__
3646 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3647 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3648 {
3649   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3650   PetscFunctionBegin;
3651   *Ad     = a->A;
3652   *Ao     = a->B;
3653   *colmap = a->garray;
3654   PetscFunctionReturn(0);
3655 }
3656 
3657 /*
3658     Special version for direct calls from Fortran (to eliminate two function call overheads
3659 */
3660 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3661 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3662 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3663 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3664 #endif
3665 
3666 #undef __FUNCT__
3667 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3668 /*@C
3669   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3670 
3671   Collective on Mat
3672 
3673   Input Parameters:
3674 + mat - the matrix
3675 . min - number of input rows
3676 . im - input rows
3677 . nin - number of input columns
3678 . in - input columns
3679 . v - numerical values input
3680 - addvin - INSERT_VALUES or ADD_VALUES
3681 
3682   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3683 
3684   Level: advanced
3685 
3686 .seealso:   MatSetValuesBlocked()
3687 @*/
3688 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3689 {
3690   /* convert input arguments to C version */
3691   Mat             mat = *matin;
3692   PetscInt        m = *min, n = *nin;
3693   InsertMode      addv = *addvin;
3694 
3695   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3696   const MatScalar *value;
3697   MatScalar       *barray=baij->barray;
3698   PetscTruth      roworiented = baij->roworiented;
3699   PetscErrorCode  ierr;
3700   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3701   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3702   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3703 
3704   PetscFunctionBegin;
3705   /* tasks normally handled by MatSetValuesBlocked() */
3706   if (mat->insertmode == NOT_SET_VALUES) {
3707     mat->insertmode = addv;
3708   }
3709 #if defined(PETSC_USE_DEBUG)
3710   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3711   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3712 #endif
3713   if (mat->assembled) {
3714     mat->was_assembled = PETSC_TRUE;
3715     mat->assembled     = PETSC_FALSE;
3716   }
3717   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3718 
3719 
3720   if(!barray) {
3721     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3722     baij->barray = barray;
3723   }
3724 
3725   if (roworiented) {
3726     stepval = (n-1)*bs;
3727   } else {
3728     stepval = (m-1)*bs;
3729   }
3730   for (i=0; i<m; i++) {
3731     if (im[i] < 0) continue;
3732 #if defined(PETSC_USE_DEBUG)
3733     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3734 #endif
3735     if (im[i] >= rstart && im[i] < rend) {
3736       row = im[i] - rstart;
3737       for (j=0; j<n; j++) {
3738         /* If NumCol = 1 then a copy is not required */
3739         if ((roworiented) && (n == 1)) {
3740           barray = (MatScalar*)v + i*bs2;
3741         } else if((!roworiented) && (m == 1)) {
3742           barray = (MatScalar*)v + j*bs2;
3743         } else { /* Here a copy is required */
3744           if (roworiented) {
3745             value = v + i*(stepval+bs)*bs + j*bs;
3746           } else {
3747             value = v + j*(stepval+bs)*bs + i*bs;
3748           }
3749           for (ii=0; ii<bs; ii++,value+=stepval) {
3750             for (jj=0; jj<bs; jj++) {
3751               *barray++  = *value++;
3752             }
3753           }
3754           barray -=bs2;
3755         }
3756 
3757         if (in[j] >= cstart && in[j] < cend){
3758           col  = in[j] - cstart;
3759           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3760         }
3761         else if (in[j] < 0) continue;
3762 #if defined(PETSC_USE_DEBUG)
3763         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3764 #endif
3765         else {
3766           if (mat->was_assembled) {
3767             if (!baij->colmap) {
3768               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3769             }
3770 
3771 #if defined(PETSC_USE_DEBUG)
3772 #if defined (PETSC_USE_CTABLE)
3773             { PetscInt data;
3774               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3775               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3776             }
3777 #else
3778             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3779 #endif
3780 #endif
3781 #if defined (PETSC_USE_CTABLE)
3782 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3783             col  = (col - 1)/bs;
3784 #else
3785             col = (baij->colmap[in[j]] - 1)/bs;
3786 #endif
3787             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3788               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3789               col =  in[j];
3790             }
3791           }
3792           else col = in[j];
3793           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3794         }
3795       }
3796     } else {
3797       if (!baij->donotstash) {
3798         if (roworiented) {
3799           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3800         } else {
3801           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3802         }
3803       }
3804     }
3805   }
3806 
3807   /* task normally handled by MatSetValuesBlocked() */
3808   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3809   PetscFunctionReturn(0);
3810 }
3811