xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision d5c9c0c4eebc2f2a01a1bd0c86fca87e2acd2a03)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
12 {
13   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
14   PetscErrorCode ierr;
15   PetscInt       i,*idxb = 0;
16   PetscScalar    *va,*vb;
17   Vec            vtmp;
18 
19   PetscFunctionBegin;
20   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
21   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
22   if (idx) {
23     for (i=0; i<A->rmap->n; i++) {
24       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
25     }
26   }
27 
28   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
29   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
30   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
31   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
32 
33   for (i=0; i<A->rmap->n; i++) {
34     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
35       va[i] = vb[i];
36       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
37     }
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
48 {
49   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 
58 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
59 {
60   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
61   PetscErrorCode ierr;
62 
63   PetscFunctionBegin;
64   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
65   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 /*
70      Local utility routine that creates a mapping from the global column
71    number to the local number in the off-diagonal part of the local
72    storage of the matrix.  This is done in a non scalable way since the
73    length of colmap equals the global matrix length.
74 */
75 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
76 {
77   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
78   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
79   PetscErrorCode ierr;
80   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
81 
82   PetscFunctionBegin;
83 #if defined(PETSC_USE_CTABLE)
84   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
85   for (i=0; i<nbs; i++) {
86     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
87   }
88 #else
89   ierr = PetscCalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
90   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
91   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
92 #endif
93   PetscFunctionReturn(0);
94 }
95 
96 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol)       \
97   { \
98     brow = row/bs;  \
99     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
100     rmax = aimax[brow]; nrow = ailen[brow]; \
101     bcol = col/bs; \
102     ridx = row % bs; cidx = col % bs; \
103     low  = 0; high = nrow; \
104     while (high-low > 3) { \
105       t = (low+high)/2; \
106       if (rp[t] > bcol) high = t; \
107       else              low  = t; \
108     } \
109     for (_i=low; _i<high; _i++) { \
110       if (rp[_i] > bcol) break; \
111       if (rp[_i] == bcol) { \
112         bap = ap +  bs2*_i + bs*cidx + ridx; \
113         if (addv == ADD_VALUES) *bap += value;  \
114         else                    *bap  = value;  \
115         goto a_noinsert; \
116       } \
117     } \
118     if (a->nonew == 1) goto a_noinsert; \
119     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
120     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
121     N = nrow++ - 1;  \
122     /* shift up all the later entries in this row */ \
123     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
124     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
125     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
126     rp[_i]                      = bcol;  \
127     ap[bs2*_i + bs*cidx + ridx] = value;  \
128 a_noinsert:; \
129     ailen[brow] = nrow; \
130   }
131 
132 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol)       \
133   { \
134     brow = row/bs;  \
135     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
136     rmax = bimax[brow]; nrow = bilen[brow]; \
137     bcol = col/bs; \
138     ridx = row % bs; cidx = col % bs; \
139     low  = 0; high = nrow; \
140     while (high-low > 3) { \
141       t = (low+high)/2; \
142       if (rp[t] > bcol) high = t; \
143       else              low  = t; \
144     } \
145     for (_i=low; _i<high; _i++) { \
146       if (rp[_i] > bcol) break; \
147       if (rp[_i] == bcol) { \
148         bap = ap +  bs2*_i + bs*cidx + ridx; \
149         if (addv == ADD_VALUES) *bap += value;  \
150         else                    *bap  = value;  \
151         goto b_noinsert; \
152       } \
153     } \
154     if (b->nonew == 1) goto b_noinsert; \
155     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column  (%D, %D) into matrix", orow, ocol); \
156     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
157     N = nrow++ - 1;  \
158     /* shift up all the later entries in this row */ \
159     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
160     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr);\
161     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
162     rp[_i]                      = bcol;  \
163     ap[bs2*_i + bs*cidx + ridx] = value;  \
164 b_noinsert:; \
165     bilen[brow] = nrow; \
166   }
167 
168 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
169 {
170   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
171   MatScalar      value;
172   PetscBool      roworiented = baij->roworiented;
173   PetscErrorCode ierr;
174   PetscInt       i,j,row,col;
175   PetscInt       rstart_orig=mat->rmap->rstart;
176   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
177   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
178 
179   /* Some Variables required in the macro */
180   Mat         A     = baij->A;
181   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
182   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
183   MatScalar   *aa   =a->a;
184 
185   Mat         B     = baij->B;
186   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
187   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
188   MatScalar   *ba   =b->a;
189 
190   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
191   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
192   MatScalar *ap,*bap;
193 
194   PetscFunctionBegin;
195   for (i=0; i<m; i++) {
196     if (im[i] < 0) continue;
197     if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
198     if (im[i] >= rstart_orig && im[i] < rend_orig) {
199       row = im[i] - rstart_orig;
200       for (j=0; j<n; j++) {
201         if (in[j] >= cstart_orig && in[j] < cend_orig) {
202           col = in[j] - cstart_orig;
203           if (roworiented) value = v[i*n+j];
204           else             value = v[i+j*m];
205           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
206         } else if (in[j] < 0) continue;
207         else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
208           SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
209         } else {
210           if (mat->was_assembled) {
211             if (!baij->colmap) {
212               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
213             }
214 #if defined(PETSC_USE_CTABLE)
215             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
216             col  = col - 1;
217 #else
218             col = baij->colmap[in[j]/bs] - 1;
219 #endif
220             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
221               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
222               col  =  in[j];
223               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
224               B    = baij->B;
225               b    = (Mat_SeqBAIJ*)(B)->data;
226               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
227               ba   =b->a;
228             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
229             else col += in[j]%bs;
230           } else col = in[j];
231           if (roworiented) value = v[i*n+j];
232           else             value = v[i+j*m];
233           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
234           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
235         }
236       }
237     } else {
238       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
239       if (!baij->donotstash) {
240         mat->assembled = PETSC_FALSE;
241         if (roworiented) {
242           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
243         } else {
244           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
245         }
246       }
247     }
248   }
249   PetscFunctionReturn(0);
250 }
251 
252 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
253 {
254   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
255   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
256   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
257   PetscErrorCode    ierr;
258   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
259   PetscBool         roworiented=a->roworiented;
260   const PetscScalar *value     = v;
261   MatScalar         *ap,*aa = a->a,*bap;
262 
263   PetscFunctionBegin;
264   rp   = aj + ai[row];
265   ap   = aa + bs2*ai[row];
266   rmax = imax[row];
267   nrow = ailen[row];
268   value = v;
269   low = 0;
270   high = nrow;
271   while (high-low > 7) {
272     t = (low+high)/2;
273     if (rp[t] > col) high = t;
274     else             low  = t;
275   }
276   for (i=low; i<high; i++) {
277     if (rp[i] > col) break;
278     if (rp[i] == col) {
279       bap = ap +  bs2*i;
280       if (roworiented) {
281         if (is == ADD_VALUES) {
282           for (ii=0; ii<bs; ii++) {
283             for (jj=ii; jj<bs2; jj+=bs) {
284               bap[jj] += *value++;
285             }
286           }
287         } else {
288           for (ii=0; ii<bs; ii++) {
289             for (jj=ii; jj<bs2; jj+=bs) {
290               bap[jj] = *value++;
291             }
292           }
293         }
294       } else {
295         if (is == ADD_VALUES) {
296           for (ii=0; ii<bs; ii++,value+=bs) {
297             for (jj=0; jj<bs; jj++) {
298               bap[jj] += value[jj];
299             }
300             bap += bs;
301           }
302         } else {
303           for (ii=0; ii<bs; ii++,value+=bs) {
304             for (jj=0; jj<bs; jj++) {
305               bap[jj]  = value[jj];
306             }
307             bap += bs;
308           }
309         }
310       }
311       goto noinsert2;
312     }
313   }
314   if (nonew == 1) goto noinsert2;
315   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
316   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
317   N = nrow++ - 1; high++;
318   /* shift up all the later entries in this row */
319   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
320   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
321   rp[i] = col;
322   bap   = ap +  bs2*i;
323   if (roworiented) {
324     for (ii=0; ii<bs; ii++) {
325       for (jj=ii; jj<bs2; jj+=bs) {
326         bap[jj] = *value++;
327       }
328     }
329   } else {
330     for (ii=0; ii<bs; ii++) {
331       for (jj=0; jj<bs; jj++) {
332         *bap++ = *value++;
333       }
334     }
335   }
336   noinsert2:;
337   ailen[row] = nrow;
338   PetscFunctionReturn(0);
339 }
340 
341 /*
342     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
343     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
344 */
345 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
346 {
347   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
348   const PetscScalar *value;
349   MatScalar         *barray     = baij->barray;
350   PetscBool         roworiented = baij->roworiented;
351   PetscErrorCode    ierr;
352   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
353   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
354   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
355 
356   PetscFunctionBegin;
357   if (!barray) {
358     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
359     baij->barray = barray;
360   }
361 
362   if (roworiented) stepval = (n-1)*bs;
363   else stepval = (m-1)*bs;
364 
365   for (i=0; i<m; i++) {
366     if (im[i] < 0) continue;
367     if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
368     if (im[i] >= rstart && im[i] < rend) {
369       row = im[i] - rstart;
370       for (j=0; j<n; j++) {
371         /* If NumCol = 1 then a copy is not required */
372         if ((roworiented) && (n == 1)) {
373           barray = (MatScalar*)v + i*bs2;
374         } else if ((!roworiented) && (m == 1)) {
375           barray = (MatScalar*)v + j*bs2;
376         } else { /* Here a copy is required */
377           if (roworiented) {
378             value = v + (i*(stepval+bs) + j)*bs;
379           } else {
380             value = v + (j*(stepval+bs) + i)*bs;
381           }
382           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
383             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
384             barray += bs;
385           }
386           barray -= bs2;
387         }
388 
389         if (in[j] >= cstart && in[j] < cend) {
390           col  = in[j] - cstart;
391           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
392         } else if (in[j] < 0) continue;
393         else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) {
394           SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
395         } else {
396           if (mat->was_assembled) {
397             if (!baij->colmap) {
398               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
399             }
400 
401 #if defined(PETSC_USE_DEBUG)
402 #if defined(PETSC_USE_CTABLE)
403             { PetscInt data;
404               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
405               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
406             }
407 #else
408             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
409 #endif
410 #endif
411 #if defined(PETSC_USE_CTABLE)
412             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
413             col  = (col - 1)/bs;
414 #else
415             col = (baij->colmap[in[j]] - 1)/bs;
416 #endif
417             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
418               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
419               col  =  in[j];
420             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]);
421           } else col = in[j];
422           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
423         }
424       }
425     } else {
426       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
427       if (!baij->donotstash) {
428         if (roworiented) {
429           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
430         } else {
431           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
432         }
433       }
434     }
435   }
436   PetscFunctionReturn(0);
437 }
438 
439 #define HASH_KEY 0.6180339887
440 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
441 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
442 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
443 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
444 {
445   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
446   PetscBool      roworiented = baij->roworiented;
447   PetscErrorCode ierr;
448   PetscInt       i,j,row,col;
449   PetscInt       rstart_orig=mat->rmap->rstart;
450   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
451   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
452   PetscReal      tmp;
453   MatScalar      **HD = baij->hd,value;
454   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
455 
456   PetscFunctionBegin;
457   for (i=0; i<m; i++) {
458     if (PetscDefined(USE_DEBUG)) {
459       if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
460       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
461     }
462     row = im[i];
463     if (row >= rstart_orig && row < rend_orig) {
464       for (j=0; j<n; j++) {
465         col = in[j];
466         if (roworiented) value = v[i*n+j];
467         else             value = v[i+j*m];
468         /* Look up PetscInto the Hash Table */
469         key = (row/bs)*Nbs+(col/bs)+1;
470         h1  = HASH(size,key,tmp);
471 
472 
473         idx = h1;
474         if (PetscDefined(USE_DEBUG)) {
475           insert_ct++;
476           total_ct++;
477           if (HT[idx] != key) {
478             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
479             if (idx == size) {
480               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
481               if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
482             }
483           }
484         } else if (HT[idx] != key) {
485           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
486           if (idx == size) {
487             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
488             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
489           }
490         }
491         /* A HASH table entry is found, so insert the values at the correct address */
492         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
493         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
494       }
495     } else if (!baij->donotstash) {
496       if (roworiented) {
497         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
498       } else {
499         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
500       }
501     }
502   }
503   if (PetscDefined(USE_DEBUG)) {
504     baij->ht_total_ct  += total_ct;
505     baij->ht_insert_ct += insert_ct;
506   }
507   PetscFunctionReturn(0);
508 }
509 
510 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
511 {
512   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
513   PetscBool         roworiented = baij->roworiented;
514   PetscErrorCode    ierr;
515   PetscInt          i,j,ii,jj,row,col;
516   PetscInt          rstart=baij->rstartbs;
517   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
518   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
519   PetscReal         tmp;
520   MatScalar         **HD = baij->hd,*baij_a;
521   const PetscScalar *v_t,*value;
522   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
523 
524   PetscFunctionBegin;
525   if (roworiented) stepval = (n-1)*bs;
526   else stepval = (m-1)*bs;
527 
528   for (i=0; i<m; i++) {
529     if (PetscDefined(USE_DEBUG)) {
530       if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
531       if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
532     }
533     row = im[i];
534     v_t = v + i*nbs2;
535     if (row >= rstart && row < rend) {
536       for (j=0; j<n; j++) {
537         col = in[j];
538 
539         /* Look up into the Hash Table */
540         key = row*Nbs+col+1;
541         h1  = HASH(size,key,tmp);
542 
543         idx = h1;
544         if (PetscDefined(USE_DEBUG)) {
545           total_ct++;
546           insert_ct++;
547           if (HT[idx] != key) {
548             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
549             if (idx == size) {
550               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
551               if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
552             }
553           }
554         } else if (HT[idx] != key) {
555           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
556           if (idx == size) {
557             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
558             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
559           }
560         }
561         baij_a = HD[idx];
562         if (roworiented) {
563           /*value = v + i*(stepval+bs)*bs + j*bs;*/
564           /* value = v + (i*(stepval+bs)+j)*bs; */
565           value = v_t;
566           v_t  += bs;
567           if (addv == ADD_VALUES) {
568             for (ii=0; ii<bs; ii++,value+=stepval) {
569               for (jj=ii; jj<bs2; jj+=bs) {
570                 baij_a[jj] += *value++;
571               }
572             }
573           } else {
574             for (ii=0; ii<bs; ii++,value+=stepval) {
575               for (jj=ii; jj<bs2; jj+=bs) {
576                 baij_a[jj] = *value++;
577               }
578             }
579           }
580         } else {
581           value = v + j*(stepval+bs)*bs + i*bs;
582           if (addv == ADD_VALUES) {
583             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
584               for (jj=0; jj<bs; jj++) {
585                 baij_a[jj] += *value++;
586               }
587             }
588           } else {
589             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
590               for (jj=0; jj<bs; jj++) {
591                 baij_a[jj] = *value++;
592               }
593             }
594           }
595         }
596       }
597     } else {
598       if (!baij->donotstash) {
599         if (roworiented) {
600           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
601         } else {
602           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
603         }
604       }
605     }
606   }
607   if (PetscDefined(USE_DEBUG)) {
608     baij->ht_total_ct  += total_ct;
609     baij->ht_insert_ct += insert_ct;
610   }
611   PetscFunctionReturn(0);
612 }
613 
614 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
615 {
616   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
617   PetscErrorCode ierr;
618   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
619   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
620 
621   PetscFunctionBegin;
622   for (i=0; i<m; i++) {
623     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
624     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
625     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
626       row = idxm[i] - bsrstart;
627       for (j=0; j<n; j++) {
628         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
629         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
630         if (idxn[j] >= bscstart && idxn[j] < bscend) {
631           col  = idxn[j] - bscstart;
632           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
633         } else {
634           if (!baij->colmap) {
635             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
636           }
637 #if defined(PETSC_USE_CTABLE)
638           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
639           data--;
640 #else
641           data = baij->colmap[idxn[j]/bs]-1;
642 #endif
643           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
644           else {
645             col  = data + idxn[j]%bs;
646             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647           }
648         }
649       }
650     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
651   }
652   PetscFunctionReturn(0);
653 }
654 
655 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
656 {
657   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
658   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
659   PetscErrorCode ierr;
660   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
661   PetscReal      sum = 0.0;
662   MatScalar      *v;
663 
664   PetscFunctionBegin;
665   if (baij->size == 1) {
666     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
667   } else {
668     if (type == NORM_FROBENIUS) {
669       v  = amat->a;
670       nz = amat->nz*bs2;
671       for (i=0; i<nz; i++) {
672         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
673       }
674       v  = bmat->a;
675       nz = bmat->nz*bs2;
676       for (i=0; i<nz; i++) {
677         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
678       }
679       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
680       *nrm = PetscSqrtReal(*nrm);
681     } else if (type == NORM_1) { /* max column sum */
682       PetscReal *tmp,*tmp2;
683       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
684       ierr = PetscCalloc1(mat->cmap->N,&tmp);CHKERRQ(ierr);
685       ierr = PetscMalloc1(mat->cmap->N,&tmp2);CHKERRQ(ierr);
686       v    = amat->a; jj = amat->j;
687       for (i=0; i<amat->nz; i++) {
688         for (j=0; j<bs; j++) {
689           col = bs*(cstart + *jj) + j; /* column index */
690           for (row=0; row<bs; row++) {
691             tmp[col] += PetscAbsScalar(*v);  v++;
692           }
693         }
694         jj++;
695       }
696       v = bmat->a; jj = bmat->j;
697       for (i=0; i<bmat->nz; i++) {
698         for (j=0; j<bs; j++) {
699           col = bs*garray[*jj] + j;
700           for (row=0; row<bs; row++) {
701             tmp[col] += PetscAbsScalar(*v); v++;
702           }
703         }
704         jj++;
705       }
706       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
707       *nrm = 0.0;
708       for (j=0; j<mat->cmap->N; j++) {
709         if (tmp2[j] > *nrm) *nrm = tmp2[j];
710       }
711       ierr = PetscFree(tmp);CHKERRQ(ierr);
712       ierr = PetscFree(tmp2);CHKERRQ(ierr);
713     } else if (type == NORM_INFINITY) { /* max row sum */
714       PetscReal *sums;
715       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
716       sum  = 0.0;
717       for (j=0; j<amat->mbs; j++) {
718         for (row=0; row<bs; row++) sums[row] = 0.0;
719         v  = amat->a + bs2*amat->i[j];
720         nz = amat->i[j+1]-amat->i[j];
721         for (i=0; i<nz; i++) {
722           for (col=0; col<bs; col++) {
723             for (row=0; row<bs; row++) {
724               sums[row] += PetscAbsScalar(*v); v++;
725             }
726           }
727         }
728         v  = bmat->a + bs2*bmat->i[j];
729         nz = bmat->i[j+1]-bmat->i[j];
730         for (i=0; i<nz; i++) {
731           for (col=0; col<bs; col++) {
732             for (row=0; row<bs; row++) {
733               sums[row] += PetscAbsScalar(*v); v++;
734             }
735           }
736         }
737         for (row=0; row<bs; row++) {
738           if (sums[row] > sum) sum = sums[row];
739         }
740       }
741       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
742       ierr = PetscFree(sums);CHKERRQ(ierr);
743     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
744   }
745   PetscFunctionReturn(0);
746 }
747 
748 /*
749   Creates the hash table, and sets the table
750   This table is created only once.
751   If new entried need to be added to the matrix
752   then the hash table has to be destroyed and
753   recreated.
754 */
755 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
756 {
757   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
758   Mat            A     = baij->A,B=baij->B;
759   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
760   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
761   PetscErrorCode ierr;
762   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
763   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
764   PetscInt       *HT,key;
765   MatScalar      **HD;
766   PetscReal      tmp;
767 #if defined(PETSC_USE_INFO)
768   PetscInt ct=0,max=0;
769 #endif
770 
771   PetscFunctionBegin;
772   if (baij->ht) PetscFunctionReturn(0);
773 
774   baij->ht_size = (PetscInt)(factor*nz);
775   ht_size       = baij->ht_size;
776 
777   /* Allocate Memory for Hash Table */
778   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
779   HD   = baij->hd;
780   HT   = baij->ht;
781 
782   /* Loop Over A */
783   for (i=0; i<a->mbs; i++) {
784     for (j=ai[i]; j<ai[i+1]; j++) {
785       row = i+rstart;
786       col = aj[j]+cstart;
787 
788       key = row*Nbs + col + 1;
789       h1  = HASH(ht_size,key,tmp);
790       for (k=0; k<ht_size; k++) {
791         if (!HT[(h1+k)%ht_size]) {
792           HT[(h1+k)%ht_size] = key;
793           HD[(h1+k)%ht_size] = a->a + j*bs2;
794           break;
795 #if defined(PETSC_USE_INFO)
796         } else {
797           ct++;
798 #endif
799         }
800       }
801 #if defined(PETSC_USE_INFO)
802       if (k> max) max = k;
803 #endif
804     }
805   }
806   /* Loop Over B */
807   for (i=0; i<b->mbs; i++) {
808     for (j=bi[i]; j<bi[i+1]; j++) {
809       row = i+rstart;
810       col = garray[bj[j]];
811       key = row*Nbs + col + 1;
812       h1  = HASH(ht_size,key,tmp);
813       for (k=0; k<ht_size; k++) {
814         if (!HT[(h1+k)%ht_size]) {
815           HT[(h1+k)%ht_size] = key;
816           HD[(h1+k)%ht_size] = b->a + j*bs2;
817           break;
818 #if defined(PETSC_USE_INFO)
819         } else {
820           ct++;
821 #endif
822         }
823       }
824 #if defined(PETSC_USE_INFO)
825       if (k> max) max = k;
826 #endif
827     }
828   }
829 
830   /* Print Summary */
831 #if defined(PETSC_USE_INFO)
832   for (i=0,j=0; i<ht_size; i++) {
833     if (HT[i]) j++;
834   }
835   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
836 #endif
837   PetscFunctionReturn(0);
838 }
839 
840 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
841 {
842   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
843   PetscErrorCode ierr;
844   PetscInt       nstash,reallocs;
845 
846   PetscFunctionBegin;
847   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
848 
849   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
850   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
851   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
852   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
853   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
854   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
855   PetscFunctionReturn(0);
856 }
857 
858 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
859 {
860   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
861   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
862   PetscErrorCode ierr;
863   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
864   PetscInt       *row,*col;
865   PetscBool      r1,r2,r3,other_disassembled;
866   MatScalar      *val;
867   PetscMPIInt    n;
868 
869   PetscFunctionBegin;
870   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
871   if (!baij->donotstash && !mat->nooffprocentries) {
872     while (1) {
873       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
874       if (!flg) break;
875 
876       for (i=0; i<n;) {
877         /* Now identify the consecutive vals belonging to the same row */
878         for (j=i,rstart=row[j]; j<n; j++) {
879           if (row[j] != rstart) break;
880         }
881         if (j < n) ncols = j-i;
882         else       ncols = n-i;
883         /* Now assemble all these values with a single function call */
884         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
885         i    = j;
886       }
887     }
888     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
889     /* Now process the block-stash. Since the values are stashed column-oriented,
890        set the roworiented flag to column oriented, and after MatSetValues()
891        restore the original flags */
892     r1 = baij->roworiented;
893     r2 = a->roworiented;
894     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
895 
896     baij->roworiented = PETSC_FALSE;
897     a->roworiented    = PETSC_FALSE;
898 
899     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
900     while (1) {
901       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
902       if (!flg) break;
903 
904       for (i=0; i<n;) {
905         /* Now identify the consecutive vals belonging to the same row */
906         for (j=i,rstart=row[j]; j<n; j++) {
907           if (row[j] != rstart) break;
908         }
909         if (j < n) ncols = j-i;
910         else       ncols = n-i;
911         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
912         i    = j;
913       }
914     }
915     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
916 
917     baij->roworiented = r1;
918     a->roworiented    = r2;
919 
920     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
921   }
922 
923   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
924   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
925 
926   /* determine if any processor has disassembled, if so we must
927      also disassemble ourselfs, in order that we may reassemble. */
928   /*
929      if nonzero structure of submatrix B cannot change then we know that
930      no processor disassembled thus we can skip this stuff
931   */
932   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
933     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
934     if (mat->was_assembled && !other_disassembled) {
935       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
936     }
937   }
938 
939   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
940     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
941   }
942   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
943   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
944 
945 #if defined(PETSC_USE_INFO)
946   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
947     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
948 
949     baij->ht_total_ct  = 0;
950     baij->ht_insert_ct = 0;
951   }
952 #endif
953   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
954     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
955 
956     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
957     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
958   }
959 
960   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
961 
962   baij->rowvalues = 0;
963 
964   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
965   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
966     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
967     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
968   }
969   PetscFunctionReturn(0);
970 }
971 
972 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
973 #include <petscdraw.h>
974 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
975 {
976   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
977   PetscErrorCode    ierr;
978   PetscMPIInt       rank = baij->rank;
979   PetscInt          bs   = mat->rmap->bs;
980   PetscBool         iascii,isdraw;
981   PetscViewer       sviewer;
982   PetscViewerFormat format;
983 
984   PetscFunctionBegin;
985   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
986   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
987   if (iascii) {
988     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
989     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
990       MatInfo info;
991       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
992       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
993       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
994       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",
995                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
996       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
997       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
998       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
999       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1000       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1001       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1002       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1003       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1004       PetscFunctionReturn(0);
1005     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1006       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1007       PetscFunctionReturn(0);
1008     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1009       PetscFunctionReturn(0);
1010     }
1011   }
1012 
1013   if (isdraw) {
1014     PetscDraw draw;
1015     PetscBool isnull;
1016     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1017     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1018     if (isnull) PetscFunctionReturn(0);
1019   }
1020 
1021   {
1022     /* assemble the entire matrix onto first processor. */
1023     Mat         A;
1024     Mat_SeqBAIJ *Aloc;
1025     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1026     MatScalar   *a;
1027     const char  *matname;
1028 
1029     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1030     /* Perhaps this should be the type of mat? */
1031     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1032     if (!rank) {
1033       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1034     } else {
1035       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1036     }
1037     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1038     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1039     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1040     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1041 
1042     /* copy over the A part */
1043     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1044     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1045     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1046 
1047     for (i=0; i<mbs; i++) {
1048       rvals[0] = bs*(baij->rstartbs + i);
1049       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1050       for (j=ai[i]; j<ai[i+1]; j++) {
1051         col = (baij->cstartbs+aj[j])*bs;
1052         for (k=0; k<bs; k++) {
1053           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1054           col++; a += bs;
1055         }
1056       }
1057     }
1058     /* copy over the B part */
1059     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1060     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1061     for (i=0; i<mbs; i++) {
1062       rvals[0] = bs*(baij->rstartbs + i);
1063       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1064       for (j=ai[i]; j<ai[i+1]; j++) {
1065         col = baij->garray[aj[j]]*bs;
1066         for (k=0; k<bs; k++) {
1067           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1068           col++; a += bs;
1069         }
1070       }
1071     }
1072     ierr = PetscFree(rvals);CHKERRQ(ierr);
1073     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1074     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1075     /*
1076        Everyone has to call to draw the matrix since the graphics waits are
1077        synchronized across all processors that share the PetscDraw object
1078     */
1079     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1080     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1081     if (!rank) {
1082       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1083       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1084     }
1085     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1086     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1087     ierr = MatDestroy(&A);CHKERRQ(ierr);
1088   }
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1093 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1094 {
1095   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
1096   Mat_SeqBAIJ    *A   = (Mat_SeqBAIJ*)aij->A->data;
1097   Mat_SeqBAIJ    *B   = (Mat_SeqBAIJ*)aij->B->data;
1098   const PetscInt *garray = aij->garray;
1099   PetscInt       header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l;
1100   PetscInt       *rowlens,*colidxs;
1101   PetscScalar    *matvals;
1102   PetscErrorCode ierr;
1103 
1104   PetscFunctionBegin;
1105   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1106 
1107   M  = mat->rmap->N;
1108   N  = mat->cmap->N;
1109   m  = mat->rmap->n;
1110   rs = mat->rmap->rstart;
1111   cs = mat->cmap->rstart;
1112   bs = mat->rmap->bs;
1113   nz = bs*bs*(A->nz + B->nz);
1114 
1115   /* write matrix header */
1116   header[0] = MAT_FILE_CLASSID;
1117   header[1] = M; header[2] = N; header[3] = nz;
1118   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1119   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1120 
1121   /* fill in and store row lengths */
1122   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1123   for (cnt=0, i=0; i<A->mbs; i++)
1124     for (j=0; j<bs; j++)
1125       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]);
1126   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1127   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1128 
1129   /* fill in and store column indices */
1130   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1131   for (cnt=0, i=0; i<A->mbs; i++) {
1132     for (k=0; k<bs; k++) {
1133       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1134         if (garray[B->j[jb]] > cs/bs) break;
1135         for (l=0; l<bs; l++)
1136           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1137       }
1138       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1139         for (l=0; l<bs; l++)
1140           colidxs[cnt++] = bs*A->j[ja] + l + cs;
1141       for (; jb<B->i[i+1]; jb++)
1142         for (l=0; l<bs; l++)
1143           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1144     }
1145   }
1146   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1147   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT);CHKERRQ(ierr);
1148   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1149 
1150   /* fill in and store nonzero values */
1151   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1152   for (cnt=0, i=0; i<A->mbs; i++) {
1153     for (k=0; k<bs; k++) {
1154       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1155         if (garray[B->j[jb]] > cs/bs) break;
1156         for (l=0; l<bs; l++)
1157           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1158       }
1159       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1160         for (l=0; l<bs; l++)
1161           matvals[cnt++] = A->a[bs*(bs*ja + l) + k];
1162       for (; jb<B->i[i+1]; jb++)
1163         for (l=0; l<bs; l++)
1164           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1165     }
1166   }
1167   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR);CHKERRQ(ierr);
1168   ierr = PetscFree(matvals);CHKERRQ(ierr);
1169 
1170   /* write block size option to the viewer's .info file */
1171   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1176 {
1177   PetscErrorCode ierr;
1178   PetscBool      iascii,isdraw,issocket,isbinary;
1179 
1180   PetscFunctionBegin;
1181   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1182   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1183   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1184   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1185   if (iascii || isdraw || issocket) {
1186     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1187   } else if (isbinary) {
1188     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1189   }
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1194 {
1195   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1196   PetscErrorCode ierr;
1197 
1198   PetscFunctionBegin;
1199 #if defined(PETSC_USE_LOG)
1200   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1201 #endif
1202   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1203   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1204   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1205   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1206 #if defined(PETSC_USE_CTABLE)
1207   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1208 #else
1209   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1210 #endif
1211   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1212   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1213   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1214   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1215   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1216   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1217   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1218   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1219 
1220   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1225   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1226   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1227   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1228   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1229 #if defined(PETSC_HAVE_HYPRE)
1230   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr);
1231 #endif
1232   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1237 {
1238   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1239   PetscErrorCode ierr;
1240   PetscInt       nt;
1241 
1242   PetscFunctionBegin;
1243   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1244   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1245   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1246   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1247   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1248   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1249   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1250   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1255 {
1256   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1257   PetscErrorCode ierr;
1258 
1259   PetscFunctionBegin;
1260   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1261   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1262   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1263   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1268 {
1269   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1270   PetscErrorCode ierr;
1271 
1272   PetscFunctionBegin;
1273   /* do nondiagonal part */
1274   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1275   /* do local part */
1276   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1277   /* add partial results together */
1278   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1279   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1284 {
1285   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1286   PetscErrorCode ierr;
1287 
1288   PetscFunctionBegin;
1289   /* do nondiagonal part */
1290   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1291   /* do local part */
1292   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1293   /* add partial results together */
1294   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1295   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1296   PetscFunctionReturn(0);
1297 }
1298 
1299 /*
1300   This only works correctly for square matrices where the subblock A->A is the
1301    diagonal block
1302 */
1303 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1304 {
1305   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1306   PetscErrorCode ierr;
1307 
1308   PetscFunctionBegin;
1309   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1310   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1311   PetscFunctionReturn(0);
1312 }
1313 
1314 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1315 {
1316   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1317   PetscErrorCode ierr;
1318 
1319   PetscFunctionBegin;
1320   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1321   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1326 {
1327   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1328   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1329   PetscErrorCode ierr;
1330   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1331   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1332   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1333 
1334   PetscFunctionBegin;
1335   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1336   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1337   mat->getrowactive = PETSC_TRUE;
1338 
1339   if (!mat->rowvalues && (idx || v)) {
1340     /*
1341         allocate enough space to hold information from the longest row.
1342     */
1343     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1344     PetscInt    max = 1,mbs = mat->mbs,tmp;
1345     for (i=0; i<mbs; i++) {
1346       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1347       if (max < tmp) max = tmp;
1348     }
1349     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1350   }
1351   lrow = row - brstart;
1352 
1353   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1354   if (!v)   {pvA = 0; pvB = 0;}
1355   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1356   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1357   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1358   nztot = nzA + nzB;
1359 
1360   cmap = mat->garray;
1361   if (v  || idx) {
1362     if (nztot) {
1363       /* Sort by increasing column numbers, assuming A and B already sorted */
1364       PetscInt imark = -1;
1365       if (v) {
1366         *v = v_p = mat->rowvalues;
1367         for (i=0; i<nzB; i++) {
1368           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1369           else break;
1370         }
1371         imark = i;
1372         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1373         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1374       }
1375       if (idx) {
1376         *idx = idx_p = mat->rowindices;
1377         if (imark > -1) {
1378           for (i=0; i<imark; i++) {
1379             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1380           }
1381         } else {
1382           for (i=0; i<nzB; i++) {
1383             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1384             else break;
1385           }
1386           imark = i;
1387         }
1388         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1389         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1390       }
1391     } else {
1392       if (idx) *idx = 0;
1393       if (v)   *v   = 0;
1394     }
1395   }
1396   *nz  = nztot;
1397   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1398   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1403 {
1404   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1405 
1406   PetscFunctionBegin;
1407   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1408   baij->getrowactive = PETSC_FALSE;
1409   PetscFunctionReturn(0);
1410 }
1411 
1412 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1413 {
1414   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1415   PetscErrorCode ierr;
1416 
1417   PetscFunctionBegin;
1418   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1419   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1420   PetscFunctionReturn(0);
1421 }
1422 
1423 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1424 {
1425   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1426   Mat            A  = a->A,B = a->B;
1427   PetscErrorCode ierr;
1428   PetscLogDouble isend[5],irecv[5];
1429 
1430   PetscFunctionBegin;
1431   info->block_size = (PetscReal)matin->rmap->bs;
1432 
1433   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1434 
1435   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1436   isend[3] = info->memory;  isend[4] = info->mallocs;
1437 
1438   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1439 
1440   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1441   isend[3] += info->memory;  isend[4] += info->mallocs;
1442 
1443   if (flag == MAT_LOCAL) {
1444     info->nz_used      = isend[0];
1445     info->nz_allocated = isend[1];
1446     info->nz_unneeded  = isend[2];
1447     info->memory       = isend[3];
1448     info->mallocs      = isend[4];
1449   } else if (flag == MAT_GLOBAL_MAX) {
1450     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1451 
1452     info->nz_used      = irecv[0];
1453     info->nz_allocated = irecv[1];
1454     info->nz_unneeded  = irecv[2];
1455     info->memory       = irecv[3];
1456     info->mallocs      = irecv[4];
1457   } else if (flag == MAT_GLOBAL_SUM) {
1458     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1459 
1460     info->nz_used      = irecv[0];
1461     info->nz_allocated = irecv[1];
1462     info->nz_unneeded  = irecv[2];
1463     info->memory       = irecv[3];
1464     info->mallocs      = irecv[4];
1465   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1466   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1467   info->fill_ratio_needed = 0;
1468   info->factor_mallocs    = 0;
1469   PetscFunctionReturn(0);
1470 }
1471 
1472 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1473 {
1474   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1475   PetscErrorCode ierr;
1476 
1477   PetscFunctionBegin;
1478   switch (op) {
1479   case MAT_NEW_NONZERO_LOCATIONS:
1480   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1481   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1482   case MAT_KEEP_NONZERO_PATTERN:
1483   case MAT_NEW_NONZERO_LOCATION_ERR:
1484     MatCheckPreallocated(A,1);
1485     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1486     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1487     break;
1488   case MAT_ROW_ORIENTED:
1489     MatCheckPreallocated(A,1);
1490     a->roworiented = flg;
1491 
1492     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1493     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1494     break;
1495   case MAT_NEW_DIAGONALS:
1496   case MAT_SORTED_FULL:
1497     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1498     break;
1499   case MAT_IGNORE_OFF_PROC_ENTRIES:
1500     a->donotstash = flg;
1501     break;
1502   case MAT_USE_HASH_TABLE:
1503     a->ht_flag = flg;
1504     a->ht_fact = 1.39;
1505     break;
1506   case MAT_SYMMETRIC:
1507   case MAT_STRUCTURALLY_SYMMETRIC:
1508   case MAT_HERMITIAN:
1509   case MAT_SUBMAT_SINGLEIS:
1510   case MAT_SYMMETRY_ETERNAL:
1511     MatCheckPreallocated(A,1);
1512     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1513     break;
1514   default:
1515     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1516   }
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1521 {
1522   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1523   Mat_SeqBAIJ    *Aloc;
1524   Mat            B;
1525   PetscErrorCode ierr;
1526   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1527   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1528   MatScalar      *a;
1529 
1530   PetscFunctionBegin;
1531   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1532     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1533     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1534     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1535     /* Do not know preallocation information, but must set block size */
1536     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1537   } else {
1538     B = *matout;
1539   }
1540 
1541   /* copy over the A part */
1542   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1543   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1544   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1545 
1546   for (i=0; i<mbs; i++) {
1547     rvals[0] = bs*(baij->rstartbs + i);
1548     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1549     for (j=ai[i]; j<ai[i+1]; j++) {
1550       col = (baij->cstartbs+aj[j])*bs;
1551       for (k=0; k<bs; k++) {
1552         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1553 
1554         col++; a += bs;
1555       }
1556     }
1557   }
1558   /* copy over the B part */
1559   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1560   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1561   for (i=0; i<mbs; i++) {
1562     rvals[0] = bs*(baij->rstartbs + i);
1563     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1564     for (j=ai[i]; j<ai[i+1]; j++) {
1565       col = baij->garray[aj[j]]*bs;
1566       for (k=0; k<bs; k++) {
1567         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1568         col++;
1569         a += bs;
1570       }
1571     }
1572   }
1573   ierr = PetscFree(rvals);CHKERRQ(ierr);
1574   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1575   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1576 
1577   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1578   else {
1579     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1580   }
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1585 {
1586   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1587   Mat            a     = baij->A,b = baij->B;
1588   PetscErrorCode ierr;
1589   PetscInt       s1,s2,s3;
1590 
1591   PetscFunctionBegin;
1592   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1593   if (rr) {
1594     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1595     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1596     /* Overlap communication with computation. */
1597     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1598   }
1599   if (ll) {
1600     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1601     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1602     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1603   }
1604   /* scale  the diagonal block */
1605   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1606 
1607   if (rr) {
1608     /* Do a scatter end and then right scale the off-diagonal block */
1609     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1610     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1611   }
1612   PetscFunctionReturn(0);
1613 }
1614 
1615 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1616 {
1617   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1618   PetscInt      *lrows;
1619   PetscInt       r, len;
1620   PetscBool      cong;
1621   PetscErrorCode ierr;
1622 
1623   PetscFunctionBegin;
1624   /* get locally owned rows */
1625   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
1626   /* fix right hand side if needed */
1627   if (x && b) {
1628     const PetscScalar *xx;
1629     PetscScalar       *bb;
1630 
1631     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1632     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1633     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1634     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1635     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1636   }
1637 
1638   /* actually zap the local rows */
1639   /*
1640         Zero the required rows. If the "diagonal block" of the matrix
1641      is square and the user wishes to set the diagonal we use separate
1642      code so that MatSetValues() is not called for each diagonal allocating
1643      new memory, thus calling lots of mallocs and slowing things down.
1644 
1645   */
1646   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1647   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1648   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1649   if ((diag != 0.0) && cong) {
1650     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1651   } else if (diag != 0.0) {
1652     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1653     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1654        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1655     for (r = 0; r < len; ++r) {
1656       const PetscInt row = lrows[r] + A->rmap->rstart;
1657       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1658     }
1659     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1660     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1661   } else {
1662     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1663   }
1664   ierr = PetscFree(lrows);CHKERRQ(ierr);
1665 
1666   /* only change matrix nonzero state if pattern was allowed to be changed */
1667   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1668     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1669     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1670   }
1671   PetscFunctionReturn(0);
1672 }
1673 
1674 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1675 {
1676   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1677   PetscErrorCode    ierr;
1678   PetscMPIInt       n = A->rmap->n,p = 0;
1679   PetscInt          i,j,k,r,len = 0,row,col,count;
1680   PetscInt          *lrows,*owners = A->rmap->range;
1681   PetscSFNode       *rrows;
1682   PetscSF           sf;
1683   const PetscScalar *xx;
1684   PetscScalar       *bb,*mask;
1685   Vec               xmask,lmask;
1686   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1687   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1688   PetscScalar       *aa;
1689 
1690   PetscFunctionBegin;
1691   /* Create SF where leaves are input rows and roots are owned rows */
1692   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1693   for (r = 0; r < n; ++r) lrows[r] = -1;
1694   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1695   for (r = 0; r < N; ++r) {
1696     const PetscInt idx   = rows[r];
1697     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1698     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1699       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1700     }
1701     rrows[r].rank  = p;
1702     rrows[r].index = rows[r] - owners[p];
1703   }
1704   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1705   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1706   /* Collect flags for rows to be zeroed */
1707   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1708   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1709   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1710   /* Compress and put in row numbers */
1711   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1712   /* zero diagonal part of matrix */
1713   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1714   /* handle off diagonal part of matrix */
1715   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1716   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1717   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1718   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1719   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1720   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1721   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1722   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1723   if (x) {
1724     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1725     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1726     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1727     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1728   }
1729   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1730   /* remove zeroed rows of off diagonal matrix */
1731   for (i = 0; i < len; ++i) {
1732     row   = lrows[i];
1733     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1734     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1735     for (k = 0; k < count; ++k) {
1736       aa[0] = 0.0;
1737       aa   += bs;
1738     }
1739   }
1740   /* loop over all elements of off process part of matrix zeroing removed columns*/
1741   for (i = 0; i < l->B->rmap->N; ++i) {
1742     row = i/bs;
1743     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1744       for (k = 0; k < bs; ++k) {
1745         col = bs*baij->j[j] + k;
1746         if (PetscAbsScalar(mask[col])) {
1747           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1748           if (x) bb[i] -= aa[0]*xx[col];
1749           aa[0] = 0.0;
1750         }
1751       }
1752     }
1753   }
1754   if (x) {
1755     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1756     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1757   }
1758   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1759   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1760   ierr = PetscFree(lrows);CHKERRQ(ierr);
1761 
1762   /* only change matrix nonzero state if pattern was allowed to be changed */
1763   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1764     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1765     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1766   }
1767   PetscFunctionReturn(0);
1768 }
1769 
1770 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1771 {
1772   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1773   PetscErrorCode ierr;
1774 
1775   PetscFunctionBegin;
1776   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1777   PetscFunctionReturn(0);
1778 }
1779 
1780 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1781 
1782 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1783 {
1784   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1785   Mat            a,b,c,d;
1786   PetscBool      flg;
1787   PetscErrorCode ierr;
1788 
1789   PetscFunctionBegin;
1790   a = matA->A; b = matA->B;
1791   c = matB->A; d = matB->B;
1792 
1793   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1794   if (flg) {
1795     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1796   }
1797   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1798   PetscFunctionReturn(0);
1799 }
1800 
1801 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1802 {
1803   PetscErrorCode ierr;
1804   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1805   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1806 
1807   PetscFunctionBegin;
1808   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1809   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1810     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1811   } else {
1812     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1813     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1814   }
1815   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1816   PetscFunctionReturn(0);
1817 }
1818 
1819 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1820 {
1821   PetscErrorCode ierr;
1822 
1823   PetscFunctionBegin;
1824   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1825   PetscFunctionReturn(0);
1826 }
1827 
1828 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1829 {
1830   PetscErrorCode ierr;
1831   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1832   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1833   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1834 
1835   PetscFunctionBegin;
1836   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
1837   PetscFunctionReturn(0);
1838 }
1839 
1840 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1841 {
1842   PetscErrorCode ierr;
1843   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1844   PetscBLASInt   bnz,one=1;
1845   Mat_SeqBAIJ    *x,*y;
1846   PetscInt       bs2 = Y->rmap->bs*Y->rmap->bs;
1847 
1848   PetscFunctionBegin;
1849   if (str == SAME_NONZERO_PATTERN) {
1850     PetscScalar alpha = a;
1851     x    = (Mat_SeqBAIJ*)xx->A->data;
1852     y    = (Mat_SeqBAIJ*)yy->A->data;
1853     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1854     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1855     x    = (Mat_SeqBAIJ*)xx->B->data;
1856     y    = (Mat_SeqBAIJ*)yy->B->data;
1857     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1858     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1859     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1860   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1861     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1862   } else {
1863     Mat      B;
1864     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1865     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1866     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1867     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1868     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1869     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1870     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1871     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
1872     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1873     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1874     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1875     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1876     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1877     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1878     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1879     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1880   }
1881   PetscFunctionReturn(0);
1882 }
1883 
1884 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1885 {
1886   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1887   PetscErrorCode ierr;
1888 
1889   PetscFunctionBegin;
1890   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1891   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1892   PetscFunctionReturn(0);
1893 }
1894 
1895 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1896 {
1897   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1898   PetscErrorCode ierr;
1899 
1900   PetscFunctionBegin;
1901   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1902   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1903   PetscFunctionReturn(0);
1904 }
1905 
1906 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1907 {
1908   PetscErrorCode ierr;
1909   IS             iscol_local;
1910   PetscInt       csize;
1911 
1912   PetscFunctionBegin;
1913   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1914   if (call == MAT_REUSE_MATRIX) {
1915     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1916     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1917   } else {
1918     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1919   }
1920   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1921   if (call == MAT_INITIAL_MATRIX) {
1922     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1923     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1924   }
1925   PetscFunctionReturn(0);
1926 }
1927 
1928 /*
1929   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1930   in local and then by concatenating the local matrices the end result.
1931   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1932   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1933 */
1934 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1935 {
1936   PetscErrorCode ierr;
1937   PetscMPIInt    rank,size;
1938   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1939   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1940   Mat            M,Mreuse;
1941   MatScalar      *vwork,*aa;
1942   MPI_Comm       comm;
1943   IS             isrow_new, iscol_new;
1944   Mat_SeqBAIJ    *aij;
1945 
1946   PetscFunctionBegin;
1947   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
1948   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1949   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1950   /* The compression and expansion should be avoided. Doesn't point
1951      out errors, might change the indices, hence buggey */
1952   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
1953   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
1954 
1955   if (call ==  MAT_REUSE_MATRIX) {
1956     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
1957     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1958     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr);
1959   } else {
1960     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr);
1961   }
1962   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
1963   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
1964   /*
1965       m - number of local rows
1966       n - number of columns (same on all processors)
1967       rstart - first row in new global matrix generated
1968   */
1969   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1970   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1971   m    = m/bs;
1972   n    = n/bs;
1973 
1974   if (call == MAT_INITIAL_MATRIX) {
1975     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1976     ii  = aij->i;
1977     jj  = aij->j;
1978 
1979     /*
1980         Determine the number of non-zeros in the diagonal and off-diagonal
1981         portions of the matrix in order to do correct preallocation
1982     */
1983 
1984     /* first get start and end of "diagonal" columns */
1985     if (csize == PETSC_DECIDE) {
1986       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
1987       if (mglobal == n*bs) { /* square matrix */
1988         nlocal = m;
1989       } else {
1990         nlocal = n/size + ((n % size) > rank);
1991       }
1992     } else {
1993       nlocal = csize/bs;
1994     }
1995     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1996     rstart = rend - nlocal;
1997     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
1998 
1999     /* next, compute all the lengths */
2000     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2001     for (i=0; i<m; i++) {
2002       jend = ii[i+1] - ii[i];
2003       olen = 0;
2004       dlen = 0;
2005       for (j=0; j<jend; j++) {
2006         if (*jj < rstart || *jj >= rend) olen++;
2007         else dlen++;
2008         jj++;
2009       }
2010       olens[i] = olen;
2011       dlens[i] = dlen;
2012     }
2013     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2014     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2015     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2016     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2017     ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2018     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2019   } else {
2020     PetscInt ml,nl;
2021 
2022     M    = *newmat;
2023     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2024     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2025     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2026     /*
2027          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2028        rather than the slower MatSetValues().
2029     */
2030     M->was_assembled = PETSC_TRUE;
2031     M->assembled     = PETSC_FALSE;
2032   }
2033   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2034   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2035   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2036   ii   = aij->i;
2037   jj   = aij->j;
2038   aa   = aij->a;
2039   for (i=0; i<m; i++) {
2040     row   = rstart/bs + i;
2041     nz    = ii[i+1] - ii[i];
2042     cwork = jj;     jj += nz;
2043     vwork = aa;     aa += nz*bs*bs;
2044     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2045   }
2046 
2047   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2048   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2049   *newmat = M;
2050 
2051   /* save submatrix used in processor for next request */
2052   if (call ==  MAT_INITIAL_MATRIX) {
2053     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2054     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2055   }
2056   PetscFunctionReturn(0);
2057 }
2058 
2059 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2060 {
2061   MPI_Comm       comm,pcomm;
2062   PetscInt       clocal_size,nrows;
2063   const PetscInt *rows;
2064   PetscMPIInt    size;
2065   IS             crowp,lcolp;
2066   PetscErrorCode ierr;
2067 
2068   PetscFunctionBegin;
2069   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2070   /* make a collective version of 'rowp' */
2071   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2072   if (pcomm==comm) {
2073     crowp = rowp;
2074   } else {
2075     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2076     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2077     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2078     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2079   }
2080   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2081   /* make a local version of 'colp' */
2082   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2083   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2084   if (size==1) {
2085     lcolp = colp;
2086   } else {
2087     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2088   }
2089   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2090   /* now we just get the submatrix */
2091   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2092   ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2093   /* clean up */
2094   if (pcomm!=comm) {
2095     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2096   }
2097   if (size>1) {
2098     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2099   }
2100   PetscFunctionReturn(0);
2101 }
2102 
2103 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2104 {
2105   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2106   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2107 
2108   PetscFunctionBegin;
2109   if (nghosts) *nghosts = B->nbs;
2110   if (ghosts) *ghosts = baij->garray;
2111   PetscFunctionReturn(0);
2112 }
2113 
2114 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2115 {
2116   Mat            B;
2117   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2118   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2119   Mat_SeqAIJ     *b;
2120   PetscErrorCode ierr;
2121   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2122   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2123   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2124 
2125   PetscFunctionBegin;
2126   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2127   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2128 
2129   /* ----------------------------------------------------------------
2130      Tell every processor the number of nonzeros per row
2131   */
2132   ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr);
2133   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2134     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2135   }
2136   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2137   displs    = recvcounts + size;
2138   for (i=0; i<size; i++) {
2139     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2140     displs[i]     = A->rmap->range[i]/bs;
2141   }
2142 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2143   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2144 #else
2145   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2146   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2147 #endif
2148   /* ---------------------------------------------------------------
2149      Create the sequential matrix of the same type as the local block diagonal
2150   */
2151   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2152   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2153   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2154   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2155   b    = (Mat_SeqAIJ*)B->data;
2156 
2157   /*--------------------------------------------------------------------
2158     Copy my part of matrix column indices over
2159   */
2160   sendcount  = ad->nz + bd->nz;
2161   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2162   a_jsendbuf = ad->j;
2163   b_jsendbuf = bd->j;
2164   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2165   cnt        = 0;
2166   for (i=0; i<n; i++) {
2167 
2168     /* put in lower diagonal portion */
2169     m = bd->i[i+1] - bd->i[i];
2170     while (m > 0) {
2171       /* is it above diagonal (in bd (compressed) numbering) */
2172       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2173       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2174       m--;
2175     }
2176 
2177     /* put in diagonal portion */
2178     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2179       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2180     }
2181 
2182     /* put in upper diagonal portion */
2183     while (m-- > 0) {
2184       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2185     }
2186   }
2187   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2188 
2189   /*--------------------------------------------------------------------
2190     Gather all column indices to all processors
2191   */
2192   for (i=0; i<size; i++) {
2193     recvcounts[i] = 0;
2194     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2195       recvcounts[i] += lens[j];
2196     }
2197   }
2198   displs[0] = 0;
2199   for (i=1; i<size; i++) {
2200     displs[i] = displs[i-1] + recvcounts[i-1];
2201   }
2202 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2203   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2204 #else
2205   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2206 #endif
2207   /*--------------------------------------------------------------------
2208     Assemble the matrix into useable form (note numerical values not yet set)
2209   */
2210   /* set the b->ilen (length of each row) values */
2211   ierr = PetscArraycpy(b->ilen,lens,A->rmap->N/bs);CHKERRQ(ierr);
2212   /* set the b->i indices */
2213   b->i[0] = 0;
2214   for (i=1; i<=A->rmap->N/bs; i++) {
2215     b->i[i] = b->i[i-1] + lens[i-1];
2216   }
2217   ierr = PetscFree(lens);CHKERRQ(ierr);
2218   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2219   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2220   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2221 
2222   if (A->symmetric) {
2223     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2224   } else if (A->hermitian) {
2225     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2226   } else if (A->structurally_symmetric) {
2227     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2228   }
2229   *newmat = B;
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2234 {
2235   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2236   PetscErrorCode ierr;
2237   Vec            bb1 = 0;
2238 
2239   PetscFunctionBegin;
2240   if (flag == SOR_APPLY_UPPER) {
2241     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2242     PetscFunctionReturn(0);
2243   }
2244 
2245   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2246     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2247   }
2248 
2249   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2250     if (flag & SOR_ZERO_INITIAL_GUESS) {
2251       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2252       its--;
2253     }
2254 
2255     while (its--) {
2256       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2257       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2258 
2259       /* update rhs: bb1 = bb - B*x */
2260       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2261       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2262 
2263       /* local sweep */
2264       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2265     }
2266   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2267     if (flag & SOR_ZERO_INITIAL_GUESS) {
2268       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2269       its--;
2270     }
2271     while (its--) {
2272       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2273       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2274 
2275       /* update rhs: bb1 = bb - B*x */
2276       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2277       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2278 
2279       /* local sweep */
2280       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2281     }
2282   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2283     if (flag & SOR_ZERO_INITIAL_GUESS) {
2284       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2285       its--;
2286     }
2287     while (its--) {
2288       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2289       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2290 
2291       /* update rhs: bb1 = bb - B*x */
2292       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2293       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2294 
2295       /* local sweep */
2296       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2297     }
2298   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2299 
2300   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2301   PetscFunctionReturn(0);
2302 }
2303 
2304 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2305 {
2306   PetscErrorCode ierr;
2307   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2308   PetscInt       N,i,*garray = aij->garray;
2309   PetscInt       ib,jb,bs = A->rmap->bs;
2310   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2311   MatScalar      *a_val = a_aij->a;
2312   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2313   MatScalar      *b_val = b_aij->a;
2314   PetscReal      *work;
2315 
2316   PetscFunctionBegin;
2317   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2318   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2319   if (type == NORM_2) {
2320     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2321       for (jb=0; jb<bs; jb++) {
2322         for (ib=0; ib<bs; ib++) {
2323           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2324           a_val++;
2325         }
2326       }
2327     }
2328     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2329       for (jb=0; jb<bs; jb++) {
2330         for (ib=0; ib<bs; ib++) {
2331           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2332           b_val++;
2333         }
2334       }
2335     }
2336   } else if (type == NORM_1) {
2337     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2338       for (jb=0; jb<bs; jb++) {
2339         for (ib=0; ib<bs; ib++) {
2340           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2341           a_val++;
2342         }
2343       }
2344     }
2345     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2346       for (jb=0; jb<bs; jb++) {
2347        for (ib=0; ib<bs; ib++) {
2348           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2349           b_val++;
2350         }
2351       }
2352     }
2353   } else if (type == NORM_INFINITY) {
2354     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2355       for (jb=0; jb<bs; jb++) {
2356         for (ib=0; ib<bs; ib++) {
2357           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2358           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2359           a_val++;
2360         }
2361       }
2362     }
2363     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2364       for (jb=0; jb<bs; jb++) {
2365         for (ib=0; ib<bs; ib++) {
2366           int col = garray[b_aij->j[i]] * bs + jb;
2367           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2368           b_val++;
2369         }
2370       }
2371     }
2372   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2373   if (type == NORM_INFINITY) {
2374     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2375   } else {
2376     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2377   }
2378   ierr = PetscFree(work);CHKERRQ(ierr);
2379   if (type == NORM_2) {
2380     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2381   }
2382   PetscFunctionReturn(0);
2383 }
2384 
2385 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2386 {
2387   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2388   PetscErrorCode ierr;
2389 
2390   PetscFunctionBegin;
2391   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2392   A->factorerrortype             = a->A->factorerrortype;
2393   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2394   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2395   PetscFunctionReturn(0);
2396 }
2397 
2398 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
2399 {
2400   PetscErrorCode ierr;
2401   Mat_MPIBAIJ    *maij = (Mat_MPIBAIJ*)Y->data;
2402   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)maij->A->data;
2403 
2404   PetscFunctionBegin;
2405   if (!Y->preallocated) {
2406     ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
2407   } else if (!aij->nz) {
2408     PetscInt nonew = aij->nonew;
2409     ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2410     aij->nonew = nonew;
2411   }
2412   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2413   PetscFunctionReturn(0);
2414 }
2415 
2416 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2417 {
2418   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2419   PetscErrorCode ierr;
2420 
2421   PetscFunctionBegin;
2422   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2423   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2424   if (d) {
2425     PetscInt rstart;
2426     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2427     *d += rstart/A->rmap->bs;
2428 
2429   }
2430   PetscFunctionReturn(0);
2431 }
2432 
2433 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2434 {
2435   PetscFunctionBegin;
2436   *a = ((Mat_MPIBAIJ*)A->data)->A;
2437   PetscFunctionReturn(0);
2438 }
2439 
2440 /* -------------------------------------------------------------------*/
2441 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2442                                        MatGetRow_MPIBAIJ,
2443                                        MatRestoreRow_MPIBAIJ,
2444                                        MatMult_MPIBAIJ,
2445                                 /* 4*/ MatMultAdd_MPIBAIJ,
2446                                        MatMultTranspose_MPIBAIJ,
2447                                        MatMultTransposeAdd_MPIBAIJ,
2448                                        0,
2449                                        0,
2450                                        0,
2451                                 /*10*/ 0,
2452                                        0,
2453                                        0,
2454                                        MatSOR_MPIBAIJ,
2455                                        MatTranspose_MPIBAIJ,
2456                                 /*15*/ MatGetInfo_MPIBAIJ,
2457                                        MatEqual_MPIBAIJ,
2458                                        MatGetDiagonal_MPIBAIJ,
2459                                        MatDiagonalScale_MPIBAIJ,
2460                                        MatNorm_MPIBAIJ,
2461                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2462                                        MatAssemblyEnd_MPIBAIJ,
2463                                        MatSetOption_MPIBAIJ,
2464                                        MatZeroEntries_MPIBAIJ,
2465                                 /*24*/ MatZeroRows_MPIBAIJ,
2466                                        0,
2467                                        0,
2468                                        0,
2469                                        0,
2470                                 /*29*/ MatSetUp_MPIBAIJ,
2471                                        0,
2472                                        0,
2473                                        MatGetDiagonalBlock_MPIBAIJ,
2474                                        0,
2475                                 /*34*/ MatDuplicate_MPIBAIJ,
2476                                        0,
2477                                        0,
2478                                        0,
2479                                        0,
2480                                 /*39*/ MatAXPY_MPIBAIJ,
2481                                        MatCreateSubMatrices_MPIBAIJ,
2482                                        MatIncreaseOverlap_MPIBAIJ,
2483                                        MatGetValues_MPIBAIJ,
2484                                        MatCopy_MPIBAIJ,
2485                                 /*44*/ 0,
2486                                        MatScale_MPIBAIJ,
2487                                        MatShift_MPIBAIJ,
2488                                        0,
2489                                        MatZeroRowsColumns_MPIBAIJ,
2490                                 /*49*/ 0,
2491                                        0,
2492                                        0,
2493                                        0,
2494                                        0,
2495                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2496                                        0,
2497                                        MatSetUnfactored_MPIBAIJ,
2498                                        MatPermute_MPIBAIJ,
2499                                        MatSetValuesBlocked_MPIBAIJ,
2500                                 /*59*/ MatCreateSubMatrix_MPIBAIJ,
2501                                        MatDestroy_MPIBAIJ,
2502                                        MatView_MPIBAIJ,
2503                                        0,
2504                                        0,
2505                                 /*64*/ 0,
2506                                        0,
2507                                        0,
2508                                        0,
2509                                        0,
2510                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2511                                        0,
2512                                        0,
2513                                        0,
2514                                        0,
2515                                 /*74*/ 0,
2516                                        MatFDColoringApply_BAIJ,
2517                                        0,
2518                                        0,
2519                                        0,
2520                                 /*79*/ 0,
2521                                        0,
2522                                        0,
2523                                        0,
2524                                        MatLoad_MPIBAIJ,
2525                                 /*84*/ 0,
2526                                        0,
2527                                        0,
2528                                        0,
2529                                        0,
2530                                 /*89*/ 0,
2531                                        0,
2532                                        0,
2533                                        0,
2534                                        0,
2535                                 /*94*/ 0,
2536                                        0,
2537                                        0,
2538                                        0,
2539                                        0,
2540                                 /*99*/ 0,
2541                                        0,
2542                                        0,
2543                                        0,
2544                                        0,
2545                                 /*104*/0,
2546                                        MatRealPart_MPIBAIJ,
2547                                        MatImaginaryPart_MPIBAIJ,
2548                                        0,
2549                                        0,
2550                                 /*109*/0,
2551                                        0,
2552                                        0,
2553                                        0,
2554                                        MatMissingDiagonal_MPIBAIJ,
2555                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2556                                        0,
2557                                        MatGetGhosts_MPIBAIJ,
2558                                        0,
2559                                        0,
2560                                 /*119*/0,
2561                                        0,
2562                                        0,
2563                                        0,
2564                                        MatGetMultiProcBlock_MPIBAIJ,
2565                                 /*124*/0,
2566                                        MatGetColumnNorms_MPIBAIJ,
2567                                        MatInvertBlockDiagonal_MPIBAIJ,
2568                                        0,
2569                                        0,
2570                                /*129*/ 0,
2571                                        0,
2572                                        0,
2573                                        0,
2574                                        0,
2575                                /*134*/ 0,
2576                                        0,
2577                                        0,
2578                                        0,
2579                                        0,
2580                                /*139*/ MatSetBlockSizes_Default,
2581                                        0,
2582                                        0,
2583                                        MatFDColoringSetUp_MPIXAIJ,
2584                                        0,
2585                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
2586 };
2587 
2588 
2589 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
2590 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
2591 
2592 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2593 {
2594   PetscInt       m,rstart,cstart,cend;
2595   PetscInt       i,j,dlen,olen,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2596   const PetscInt *JJ    =0;
2597   PetscScalar    *values=0;
2598   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2599   PetscErrorCode ierr;
2600   PetscBool      nooffprocentries;
2601 
2602   PetscFunctionBegin;
2603   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2604   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2605   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2606   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2607   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2608   m      = B->rmap->n/bs;
2609   rstart = B->rmap->rstart/bs;
2610   cstart = B->cmap->rstart/bs;
2611   cend   = B->cmap->rend/bs;
2612 
2613   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2614   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2615   for (i=0; i<m; i++) {
2616     nz = ii[i+1] - ii[i];
2617     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2618     nz_max = PetscMax(nz_max,nz);
2619     dlen   = 0;
2620     olen   = 0;
2621     JJ     = jj + ii[i];
2622     for (j=0; j<nz; j++) {
2623       if (*JJ < cstart || *JJ >= cend) olen++;
2624       else dlen++;
2625       JJ++;
2626     }
2627     d_nnz[i] = dlen;
2628     o_nnz[i] = olen;
2629   }
2630   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2631   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2632 
2633   values = (PetscScalar*)V;
2634   if (!values) {
2635     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2636   }
2637   for (i=0; i<m; i++) {
2638     PetscInt          row    = i + rstart;
2639     PetscInt          ncols  = ii[i+1] - ii[i];
2640     const PetscInt    *icols = jj + ii[i];
2641     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2642       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2643       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2644     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2645       PetscInt j;
2646       for (j=0; j<ncols; j++) {
2647         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2648         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2649       }
2650     }
2651   }
2652 
2653   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2654   nooffprocentries    = B->nooffprocentries;
2655   B->nooffprocentries = PETSC_TRUE;
2656   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2657   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2658   B->nooffprocentries = nooffprocentries;
2659 
2660   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2661   PetscFunctionReturn(0);
2662 }
2663 
2664 /*@C
2665    MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
2666 
2667    Collective
2668 
2669    Input Parameters:
2670 +  B - the matrix
2671 .  bs - the block size
2672 .  i - the indices into j for the start of each local row (starts with zero)
2673 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2674 -  v - optional values in the matrix
2675 
2676    Level: advanced
2677 
2678    Notes:
2679     The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2680    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2681    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2682    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2683    block column and the second index is over columns within a block.
2684 
2685    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2686 
2687 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2688 @*/
2689 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2690 {
2691   PetscErrorCode ierr;
2692 
2693   PetscFunctionBegin;
2694   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2695   PetscValidType(B,1);
2696   PetscValidLogicalCollectiveInt(B,bs,2);
2697   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2698   PetscFunctionReturn(0);
2699 }
2700 
2701 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2702 {
2703   Mat_MPIBAIJ    *b;
2704   PetscErrorCode ierr;
2705   PetscInt       i;
2706   PetscMPIInt    size;
2707 
2708   PetscFunctionBegin;
2709   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2710   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2711   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2712   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2713 
2714   if (d_nnz) {
2715     for (i=0; i<B->rmap->n/bs; i++) {
2716       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2717     }
2718   }
2719   if (o_nnz) {
2720     for (i=0; i<B->rmap->n/bs; i++) {
2721       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2722     }
2723   }
2724 
2725   b      = (Mat_MPIBAIJ*)B->data;
2726   b->bs2 = bs*bs;
2727   b->mbs = B->rmap->n/bs;
2728   b->nbs = B->cmap->n/bs;
2729   b->Mbs = B->rmap->N/bs;
2730   b->Nbs = B->cmap->N/bs;
2731 
2732   for (i=0; i<=b->size; i++) {
2733     b->rangebs[i] = B->rmap->range[i]/bs;
2734   }
2735   b->rstartbs = B->rmap->rstart/bs;
2736   b->rendbs   = B->rmap->rend/bs;
2737   b->cstartbs = B->cmap->rstart/bs;
2738   b->cendbs   = B->cmap->rend/bs;
2739 
2740 #if defined(PETSC_USE_CTABLE)
2741   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2742 #else
2743   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2744 #endif
2745   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2746   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2747   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2748 
2749   /* Because the B will have been resized we simply destroy it and create a new one each time */
2750   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2751   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2752   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2753   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2754   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2755   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2756 
2757   if (!B->preallocated) {
2758     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2759     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2760     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2761     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2762     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2763   }
2764 
2765   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2766   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2767   B->preallocated  = PETSC_TRUE;
2768   B->was_assembled = PETSC_FALSE;
2769   B->assembled     = PETSC_FALSE;
2770   PetscFunctionReturn(0);
2771 }
2772 
2773 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2774 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2775 
2776 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2777 {
2778   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2779   PetscErrorCode ierr;
2780   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2781   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2782   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2783 
2784   PetscFunctionBegin;
2785   ierr  = PetscMalloc1(M+1,&ii);CHKERRQ(ierr);
2786   ii[0] = 0;
2787   for (i=0; i<M; i++) {
2788     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2789     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2790     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2791     /* remove one from count of matrix has diagonal */
2792     for (j=id[i]; j<id[i+1]; j++) {
2793       if (jd[j] == i) {ii[i+1]--;break;}
2794     }
2795   }
2796   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2797   cnt  = 0;
2798   for (i=0; i<M; i++) {
2799     for (j=io[i]; j<io[i+1]; j++) {
2800       if (garray[jo[j]] > rstart) break;
2801       jj[cnt++] = garray[jo[j]];
2802     }
2803     for (k=id[i]; k<id[i+1]; k++) {
2804       if (jd[k] != i) {
2805         jj[cnt++] = rstart + jd[k];
2806       }
2807     }
2808     for (; j<io[i+1]; j++) {
2809       jj[cnt++] = garray[jo[j]];
2810     }
2811   }
2812   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2813   PetscFunctionReturn(0);
2814 }
2815 
2816 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2817 
2818 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2819 
2820 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2821 {
2822   PetscErrorCode ierr;
2823   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2824   Mat            B;
2825   Mat_MPIAIJ     *b;
2826 
2827   PetscFunctionBegin;
2828   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2829 
2830   if (reuse == MAT_REUSE_MATRIX) {
2831     B = *newmat;
2832   } else {
2833     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2834     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2835     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2836     ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2837     ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2838     ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2839   }
2840   b = (Mat_MPIAIJ*) B->data;
2841 
2842   if (reuse == MAT_REUSE_MATRIX) {
2843     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr);
2844     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr);
2845   } else {
2846     ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2847     ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2848     ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2849     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2850     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2851     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2852     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2853   }
2854   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2855   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2856 
2857   if (reuse == MAT_INPLACE_MATRIX) {
2858     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
2859   } else {
2860    *newmat = B;
2861   }
2862   PetscFunctionReturn(0);
2863 }
2864 
2865 /*MC
2866    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2867 
2868    Options Database Keys:
2869 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2870 . -mat_block_size <bs> - set the blocksize used to store the matrix
2871 - -mat_use_hash_table <fact>
2872 
2873    Level: beginner
2874 
2875    Notes:
2876     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
2877     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
2878 
2879 .seealso: MatCreateBAIJ
2880 M*/
2881 
2882 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2883 
2884 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2885 {
2886   Mat_MPIBAIJ    *b;
2887   PetscErrorCode ierr;
2888   PetscBool      flg = PETSC_FALSE;
2889 
2890   PetscFunctionBegin;
2891   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2892   B->data = (void*)b;
2893 
2894   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2895   B->assembled = PETSC_FALSE;
2896 
2897   B->insertmode = NOT_SET_VALUES;
2898   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2899   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2900 
2901   /* build local table of row and column ownerships */
2902   ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr);
2903 
2904   /* build cache for off array entries formed */
2905   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2906 
2907   b->donotstash  = PETSC_FALSE;
2908   b->colmap      = NULL;
2909   b->garray      = NULL;
2910   b->roworiented = PETSC_TRUE;
2911 
2912   /* stuff used in block assembly */
2913   b->barray = 0;
2914 
2915   /* stuff used for matrix vector multiply */
2916   b->lvec  = 0;
2917   b->Mvctx = 0;
2918 
2919   /* stuff for MatGetRow() */
2920   b->rowindices   = 0;
2921   b->rowvalues    = 0;
2922   b->getrowactive = PETSC_FALSE;
2923 
2924   /* hash table stuff */
2925   b->ht           = 0;
2926   b->hd           = 0;
2927   b->ht_size      = 0;
2928   b->ht_flag      = PETSC_FALSE;
2929   b->ht_fact      = 0;
2930   b->ht_total_ct  = 0;
2931   b->ht_insert_ct = 0;
2932 
2933   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2934   b->ijonly = PETSC_FALSE;
2935 
2936 
2937   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
2938   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
2939   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
2940 #if defined(PETSC_HAVE_HYPRE)
2941   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
2942 #endif
2943   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2944   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2945   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2946   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2947   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2948   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2949   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
2950   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
2951 
2952   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2953   ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr);
2954   if (flg) {
2955     PetscReal fact = 1.39;
2956     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2957     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2958     if (fact <= 1.0) fact = 1.39;
2959     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2960     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2961   }
2962   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2963   PetscFunctionReturn(0);
2964 }
2965 
2966 /*MC
2967    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2968 
2969    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2970    and MATMPIBAIJ otherwise.
2971 
2972    Options Database Keys:
2973 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2974 
2975   Level: beginner
2976 
2977 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2978 M*/
2979 
2980 /*@C
2981    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2982    (block compressed row).  For good matrix assembly performance
2983    the user should preallocate the matrix storage by setting the parameters
2984    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2985    performance can be increased by more than a factor of 50.
2986 
2987    Collective on Mat
2988 
2989    Input Parameters:
2990 +  B - the matrix
2991 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2992           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2993 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2994            submatrix  (same for all local rows)
2995 .  d_nnz - array containing the number of block nonzeros in the various block rows
2996            of the in diagonal portion of the local (possibly different for each block
2997            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
2998            set it even if it is zero.
2999 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3000            submatrix (same for all local rows).
3001 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3002            off-diagonal portion of the local submatrix (possibly different for
3003            each block row) or NULL.
3004 
3005    If the *_nnz parameter is given then the *_nz parameter is ignored
3006 
3007    Options Database Keys:
3008 +   -mat_block_size - size of the blocks to use
3009 -   -mat_use_hash_table <fact>
3010 
3011    Notes:
3012    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3013    than it must be used on all processors that share the object for that argument.
3014 
3015    Storage Information:
3016    For a square global matrix we define each processor's diagonal portion
3017    to be its local rows and the corresponding columns (a square submatrix);
3018    each processor's off-diagonal portion encompasses the remainder of the
3019    local matrix (a rectangular submatrix).
3020 
3021    The user can specify preallocated storage for the diagonal part of
3022    the local submatrix with either d_nz or d_nnz (not both).  Set
3023    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3024    memory allocation.  Likewise, specify preallocated storage for the
3025    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3026 
3027    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3028    the figure below we depict these three local rows and all columns (0-11).
3029 
3030 .vb
3031            0 1 2 3 4 5 6 7 8 9 10 11
3032           --------------------------
3033    row 3  |o o o d d d o o o o  o  o
3034    row 4  |o o o d d d o o o o  o  o
3035    row 5  |o o o d d d o o o o  o  o
3036           --------------------------
3037 .ve
3038 
3039    Thus, any entries in the d locations are stored in the d (diagonal)
3040    submatrix, and any entries in the o locations are stored in the
3041    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3042    stored simply in the MATSEQBAIJ format for compressed row storage.
3043 
3044    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3045    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3046    In general, for PDE problems in which most nonzeros are near the diagonal,
3047    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3048    or you will get TERRIBLE performance; see the users' manual chapter on
3049    matrices.
3050 
3051    You can call MatGetInfo() to get information on how effective the preallocation was;
3052    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3053    You can also run with the option -info and look for messages with the string
3054    malloc in them to see if additional memory allocation was needed.
3055 
3056    Level: intermediate
3057 
3058 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3059 @*/
3060 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3061 {
3062   PetscErrorCode ierr;
3063 
3064   PetscFunctionBegin;
3065   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3066   PetscValidType(B,1);
3067   PetscValidLogicalCollectiveInt(B,bs,2);
3068   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3069   PetscFunctionReturn(0);
3070 }
3071 
3072 /*@C
3073    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3074    (block compressed row).  For good matrix assembly performance
3075    the user should preallocate the matrix storage by setting the parameters
3076    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3077    performance can be increased by more than a factor of 50.
3078 
3079    Collective
3080 
3081    Input Parameters:
3082 +  comm - MPI communicator
3083 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3084           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3085 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3086            This value should be the same as the local size used in creating the
3087            y vector for the matrix-vector product y = Ax.
3088 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3089            This value should be the same as the local size used in creating the
3090            x vector for the matrix-vector product y = Ax.
3091 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3092 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3093 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3094            submatrix  (same for all local rows)
3095 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3096            of the in diagonal portion of the local (possibly different for each block
3097            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3098            and set it even if it is zero.
3099 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3100            submatrix (same for all local rows).
3101 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3102            off-diagonal portion of the local submatrix (possibly different for
3103            each block row) or NULL.
3104 
3105    Output Parameter:
3106 .  A - the matrix
3107 
3108    Options Database Keys:
3109 +   -mat_block_size - size of the blocks to use
3110 -   -mat_use_hash_table <fact>
3111 
3112    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3113    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3114    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3115 
3116    Notes:
3117    If the *_nnz parameter is given then the *_nz parameter is ignored
3118 
3119    A nonzero block is any block that as 1 or more nonzeros in it
3120 
3121    The user MUST specify either the local or global matrix dimensions
3122    (possibly both).
3123 
3124    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3125    than it must be used on all processors that share the object for that argument.
3126 
3127    Storage Information:
3128    For a square global matrix we define each processor's diagonal portion
3129    to be its local rows and the corresponding columns (a square submatrix);
3130    each processor's off-diagonal portion encompasses the remainder of the
3131    local matrix (a rectangular submatrix).
3132 
3133    The user can specify preallocated storage for the diagonal part of
3134    the local submatrix with either d_nz or d_nnz (not both).  Set
3135    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3136    memory allocation.  Likewise, specify preallocated storage for the
3137    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3138 
3139    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3140    the figure below we depict these three local rows and all columns (0-11).
3141 
3142 .vb
3143            0 1 2 3 4 5 6 7 8 9 10 11
3144           --------------------------
3145    row 3  |o o o d d d o o o o  o  o
3146    row 4  |o o o d d d o o o o  o  o
3147    row 5  |o o o d d d o o o o  o  o
3148           --------------------------
3149 .ve
3150 
3151    Thus, any entries in the d locations are stored in the d (diagonal)
3152    submatrix, and any entries in the o locations are stored in the
3153    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3154    stored simply in the MATSEQBAIJ format for compressed row storage.
3155 
3156    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3157    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3158    In general, for PDE problems in which most nonzeros are near the diagonal,
3159    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3160    or you will get TERRIBLE performance; see the users' manual chapter on
3161    matrices.
3162 
3163    Level: intermediate
3164 
3165 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3166 @*/
3167 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3168 {
3169   PetscErrorCode ierr;
3170   PetscMPIInt    size;
3171 
3172   PetscFunctionBegin;
3173   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3174   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3175   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3176   if (size > 1) {
3177     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3178     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3179   } else {
3180     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3181     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3182   }
3183   PetscFunctionReturn(0);
3184 }
3185 
3186 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3187 {
3188   Mat            mat;
3189   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3190   PetscErrorCode ierr;
3191   PetscInt       len=0;
3192 
3193   PetscFunctionBegin;
3194   *newmat = 0;
3195   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3196   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3197   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3198 
3199   mat->factortype   = matin->factortype;
3200   mat->preallocated = PETSC_TRUE;
3201   mat->assembled    = PETSC_TRUE;
3202   mat->insertmode   = NOT_SET_VALUES;
3203 
3204   a             = (Mat_MPIBAIJ*)mat->data;
3205   mat->rmap->bs = matin->rmap->bs;
3206   a->bs2        = oldmat->bs2;
3207   a->mbs        = oldmat->mbs;
3208   a->nbs        = oldmat->nbs;
3209   a->Mbs        = oldmat->Mbs;
3210   a->Nbs        = oldmat->Nbs;
3211 
3212   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3213   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3214 
3215   a->size         = oldmat->size;
3216   a->rank         = oldmat->rank;
3217   a->donotstash   = oldmat->donotstash;
3218   a->roworiented  = oldmat->roworiented;
3219   a->rowindices   = 0;
3220   a->rowvalues    = 0;
3221   a->getrowactive = PETSC_FALSE;
3222   a->barray       = 0;
3223   a->rstartbs     = oldmat->rstartbs;
3224   a->rendbs       = oldmat->rendbs;
3225   a->cstartbs     = oldmat->cstartbs;
3226   a->cendbs       = oldmat->cendbs;
3227 
3228   /* hash table stuff */
3229   a->ht           = 0;
3230   a->hd           = 0;
3231   a->ht_size      = 0;
3232   a->ht_flag      = oldmat->ht_flag;
3233   a->ht_fact      = oldmat->ht_fact;
3234   a->ht_total_ct  = 0;
3235   a->ht_insert_ct = 0;
3236 
3237   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1);CHKERRQ(ierr);
3238   if (oldmat->colmap) {
3239 #if defined(PETSC_USE_CTABLE)
3240     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3241 #else
3242     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
3243     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3244     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
3245 #endif
3246   } else a->colmap = 0;
3247 
3248   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3249     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3250     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3251     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
3252   } else a->garray = 0;
3253 
3254   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3255   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3256   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3257   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3258   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3259 
3260   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3261   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3262   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3263   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3264   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3265   *newmat = mat;
3266   PetscFunctionReturn(0);
3267 }
3268 
3269 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3270 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
3271 {
3272   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3273   PetscInt       *rowidxs,*colidxs,rs,cs,ce;
3274   PetscScalar    *matvals;
3275   PetscErrorCode ierr;
3276 
3277   PetscFunctionBegin;
3278   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3279 
3280   /* read in matrix header */
3281   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3282   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3283   M  = header[1]; N = header[2]; nz = header[3];
3284   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3285   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3286   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ");
3287 
3288   /* set block sizes from the viewer's .info file */
3289   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3290   /* set local sizes if not set already */
3291   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3292   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3293   /* set global sizes if not set already */
3294   if (mat->rmap->N < 0) mat->rmap->N = M;
3295   if (mat->cmap->N < 0) mat->cmap->N = N;
3296   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3297   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3298 
3299   /* check if the matrix sizes are correct */
3300   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3301   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3302   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3303   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
3304   ierr = PetscLayoutGetRange(mat->rmap,&rs,NULL);
3305   ierr = PetscLayoutGetRange(mat->cmap,&cs,&ce);
3306   mbs = m/bs; nbs = n/bs;
3307 
3308   /* read in row lengths and build row indices */
3309   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3310   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3311   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3312   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
3313   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3314 
3315   /* read in column indices and matrix values */
3316   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3317   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3318   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3319 
3320   { /* preallocate matrix storage */
3321     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3322     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3323     PetscBool  sbaij,done;
3324     PetscInt   *d_nnz,*o_nnz;
3325 
3326     ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr);
3327     ierr = PetscHSetICreate(&ht);CHKERRQ(ierr);
3328     ierr = PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz);CHKERRQ(ierr);
3329     ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij);CHKERRQ(ierr);
3330     for (i=0; i<mbs; i++) {
3331       ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr);
3332       ierr = PetscHSetIClear(ht);CHKERRQ(ierr);
3333       for (k=0; k<bs; k++) {
3334         PetscInt row = bs*i + k;
3335         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3336           PetscInt col = colidxs[j];
3337           if (!sbaij || col >= row) {
3338             if (col >= cs && col < ce) {
3339               if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++;
3340             } else {
3341               ierr = PetscHSetIQueryAdd(ht,col/bs,&done);CHKERRQ(ierr);
3342               if (done) o_nnz[i]++;
3343             }
3344           }
3345         }
3346       }
3347     }
3348     ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
3349     ierr = PetscHSetIDestroy(&ht);CHKERRQ(ierr);
3350     ierr = MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3351     ierr = MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3352     ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3353   }
3354 
3355   /* store matrix values */
3356   for (i=0; i<m; i++) {
3357     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1];
3358     ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr);
3359   }
3360 
3361   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3362   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3363   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3364   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3365   PetscFunctionReturn(0);
3366 }
3367 
3368 PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer)
3369 {
3370   PetscErrorCode ierr;
3371   PetscBool      isbinary;
3372 
3373   PetscFunctionBegin;
3374   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3375   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3376   ierr = MatLoad_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
3377   PetscFunctionReturn(0);
3378 }
3379 
3380 /*@
3381    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3382 
3383    Input Parameters:
3384 +  mat  - the matrix
3385 -  fact - factor
3386 
3387    Not Collective, each process can use a different factor
3388 
3389    Level: advanced
3390 
3391   Notes:
3392    This can also be set by the command line option: -mat_use_hash_table <fact>
3393 
3394 .seealso: MatSetOption()
3395 @*/
3396 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3397 {
3398   PetscErrorCode ierr;
3399 
3400   PetscFunctionBegin;
3401   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3402   PetscFunctionReturn(0);
3403 }
3404 
3405 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3406 {
3407   Mat_MPIBAIJ *baij;
3408 
3409   PetscFunctionBegin;
3410   baij          = (Mat_MPIBAIJ*)mat->data;
3411   baij->ht_fact = fact;
3412   PetscFunctionReturn(0);
3413 }
3414 
3415 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3416 {
3417   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3418   PetscBool      flg;
3419   PetscErrorCode ierr;
3420 
3421   PetscFunctionBegin;
3422   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr);
3423   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input");
3424   if (Ad)     *Ad     = a->A;
3425   if (Ao)     *Ao     = a->B;
3426   if (colmap) *colmap = a->garray;
3427   PetscFunctionReturn(0);
3428 }
3429 
3430 /*
3431     Special version for direct calls from Fortran (to eliminate two function call overheads
3432 */
3433 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3434 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3435 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3436 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3437 #endif
3438 
3439 /*@C
3440   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3441 
3442   Collective on Mat
3443 
3444   Input Parameters:
3445 + mat - the matrix
3446 . min - number of input rows
3447 . im - input rows
3448 . nin - number of input columns
3449 . in - input columns
3450 . v - numerical values input
3451 - addvin - INSERT_VALUES or ADD_VALUES
3452 
3453   Notes:
3454     This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3455 
3456   Level: advanced
3457 
3458 .seealso:   MatSetValuesBlocked()
3459 @*/
3460 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3461 {
3462   /* convert input arguments to C version */
3463   Mat        mat  = *matin;
3464   PetscInt   m    = *min, n = *nin;
3465   InsertMode addv = *addvin;
3466 
3467   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3468   const MatScalar *value;
3469   MatScalar       *barray     = baij->barray;
3470   PetscBool       roworiented = baij->roworiented;
3471   PetscErrorCode  ierr;
3472   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3473   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3474   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3475 
3476   PetscFunctionBegin;
3477   /* tasks normally handled by MatSetValuesBlocked() */
3478   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3479   else if (PetscUnlikely(mat->insertmode != addv)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3480   if (PetscUnlikely(mat->factortype)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3481   if (mat->assembled) {
3482     mat->was_assembled = PETSC_TRUE;
3483     mat->assembled     = PETSC_FALSE;
3484   }
3485   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3486 
3487 
3488   if (!barray) {
3489     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3490     baij->barray = barray;
3491   }
3492 
3493   if (roworiented) stepval = (n-1)*bs;
3494   else stepval = (m-1)*bs;
3495 
3496   for (i=0; i<m; i++) {
3497     if (im[i] < 0) continue;
3498     if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3499     if (im[i] >= rstart && im[i] < rend) {
3500       row = im[i] - rstart;
3501       for (j=0; j<n; j++) {
3502         /* If NumCol = 1 then a copy is not required */
3503         if ((roworiented) && (n == 1)) {
3504           barray = (MatScalar*)v + i*bs2;
3505         } else if ((!roworiented) && (m == 1)) {
3506           barray = (MatScalar*)v + j*bs2;
3507         } else { /* Here a copy is required */
3508           if (roworiented) {
3509             value = v + i*(stepval+bs)*bs + j*bs;
3510           } else {
3511             value = v + j*(stepval+bs)*bs + i*bs;
3512           }
3513           for (ii=0; ii<bs; ii++,value+=stepval) {
3514             for (jj=0; jj<bs; jj++) {
3515               *barray++ = *value++;
3516             }
3517           }
3518           barray -=bs2;
3519         }
3520 
3521         if (in[j] >= cstart && in[j] < cend) {
3522           col  = in[j] - cstart;
3523           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3524         } else if (in[j] < 0) continue;
3525         else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3526         else {
3527           if (mat->was_assembled) {
3528             if (!baij->colmap) {
3529               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3530             }
3531 
3532 #if defined(PETSC_USE_DEBUG)
3533 #if defined(PETSC_USE_CTABLE)
3534             { PetscInt data;
3535               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3536               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3537             }
3538 #else
3539             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3540 #endif
3541 #endif
3542 #if defined(PETSC_USE_CTABLE)
3543             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3544             col  = (col - 1)/bs;
3545 #else
3546             col = (baij->colmap[in[j]] - 1)/bs;
3547 #endif
3548             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3549               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3550               col  =  in[j];
3551             }
3552           } else col = in[j];
3553           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3554         }
3555       }
3556     } else {
3557       if (!baij->donotstash) {
3558         if (roworiented) {
3559           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3560         } else {
3561           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3562         }
3563       }
3564     }
3565   }
3566 
3567   /* task normally handled by MatSetValuesBlocked() */
3568   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3569   PetscFunctionReturn(0);
3570 }
3571 
3572 /*@
3573      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block
3574          CSR format the local rows.
3575 
3576    Collective
3577 
3578    Input Parameters:
3579 +  comm - MPI communicator
3580 .  bs - the block size, only a block size of 1 is supported
3581 .  m - number of local rows (Cannot be PETSC_DECIDE)
3582 .  n - This value should be the same as the local size used in creating the
3583        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3584        calculated if N is given) For square matrices n is almost always m.
3585 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3586 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3587 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3588 .   j - column indices
3589 -   a - matrix values
3590 
3591    Output Parameter:
3592 .   mat - the matrix
3593 
3594    Level: intermediate
3595 
3596    Notes:
3597        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3598      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3599      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3600 
3601      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3602      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3603      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3604      with column-major ordering within blocks.
3605 
3606        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3607 
3608 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3609           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3610 @*/
3611 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3612 {
3613   PetscErrorCode ierr;
3614 
3615   PetscFunctionBegin;
3616   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3617   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3618   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3619   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3620   ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr);
3621   ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr);
3622   ierr = MatSetUp(*mat);CHKERRQ(ierr);
3623   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3624   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3625   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3626   PetscFunctionReturn(0);
3627 }
3628 
3629 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3630 {
3631   PetscErrorCode ierr;
3632   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3633   PetscInt       *indx;
3634   PetscScalar    *values;
3635 
3636   PetscFunctionBegin;
3637   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3638   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3639     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
3640     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3641     PetscInt       *bindx,rmax=a->rmax,j;
3642     PetscMPIInt    rank,size;
3643 
3644     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3645     mbs = m/bs; Nbs = N/cbs;
3646     if (n == PETSC_DECIDE) {
3647       ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);
3648     }
3649     nbs = n/cbs;
3650 
3651     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3652     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
3653 
3654     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3655     ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr);
3656     if (rank == size-1) {
3657       /* Check sum(nbs) = Nbs */
3658       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
3659     }
3660 
3661     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3662     for (i=0; i<mbs; i++) {
3663       ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3664       nnz = nnz/bs;
3665       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3666       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3667       ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3668     }
3669     ierr = PetscFree(bindx);CHKERRQ(ierr);
3670 
3671     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3672     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3673     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3674     ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr);
3675     ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3676     ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3677     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3678   }
3679 
3680   /* numeric phase */
3681   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3682   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3683 
3684   for (i=0; i<m; i++) {
3685     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3686     Ii   = i + rstart;
3687     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3688     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3689   }
3690   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3691   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3692   PetscFunctionReturn(0);
3693 }
3694