xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision b45e3bf4ff73d80a20c3202b6cd9f79d2f2d3efe)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
12 {
13   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ*)A->data;
14   PetscErrorCode    ierr;
15   PetscInt          i,*idxb = NULL,m = A->rmap->n,bs = A->cmap->bs;
16   PetscScalar       *va,*vv;
17   Vec               vB,vA;
18   const PetscScalar *vb;
19 
20   PetscFunctionBegin;
21   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
22   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
23 
24   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
25   if (idx) {
26     for (i=0; i<m; i++) {
27       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
28     }
29   }
30 
31   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
32   ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
33   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
34 
35   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
36   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
37   for (i=0; i<m; i++) {
38     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
39       vv[i] = vb[i];
40       if (idx) idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs);
41     } else {
42       vv[i] = va[i];
43       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs*a->garray[idxb[i]/bs] + (idxb[i] % bs))
44         idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs);
45     }
46   }
47   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
48   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
49   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
50   ierr = PetscFree(idxb);CHKERRQ(ierr);
51   ierr = VecDestroy(&vA);CHKERRQ(ierr);
52   ierr = VecDestroy(&vB);CHKERRQ(ierr);
53   PetscFunctionReturn(0);
54 }
55 
56 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
57 {
58   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
59   PetscErrorCode ierr;
60 
61   PetscFunctionBegin;
62   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
63   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
64   PetscFunctionReturn(0);
65 }
66 
67 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
68 {
69   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
70   PetscErrorCode ierr;
71 
72   PetscFunctionBegin;
73   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
74   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
75   PetscFunctionReturn(0);
76 }
77 
78 /*
79      Local utility routine that creates a mapping from the global column
80    number to the local number in the off-diagonal part of the local
81    storage of the matrix.  This is done in a non scalable way since the
82    length of colmap equals the global matrix length.
83 */
84 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
85 {
86   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
87   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
88   PetscErrorCode ierr;
89   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
90 
91   PetscFunctionBegin;
92 #if defined(PETSC_USE_CTABLE)
93   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
94   for (i=0; i<nbs; i++) {
95     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
96   }
97 #else
98   ierr = PetscCalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
99   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
100   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
101 #endif
102   PetscFunctionReturn(0);
103 }
104 
105 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol)       \
106   { \
107     brow = row/bs;  \
108     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
109     rmax = aimax[brow]; nrow = ailen[brow]; \
110     bcol = col/bs; \
111     ridx = row % bs; cidx = col % bs; \
112     low  = 0; high = nrow; \
113     while (high-low > 3) { \
114       t = (low+high)/2; \
115       if (rp[t] > bcol) high = t; \
116       else              low  = t; \
117     } \
118     for (_i=low; _i<high; _i++) { \
119       if (rp[_i] > bcol) break; \
120       if (rp[_i] == bcol) { \
121         bap = ap +  bs2*_i + bs*cidx + ridx; \
122         if (addv == ADD_VALUES) *bap += value;  \
123         else                    *bap  = value;  \
124         goto a_noinsert; \
125       } \
126     } \
127     if (a->nonew == 1) goto a_noinsert; \
128     PetscCheckFalse(a->nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
129     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
130     N = nrow++ - 1;  \
131     /* shift up all the later entries in this row */ \
132     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
133     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
134     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
135     rp[_i]                      = bcol;  \
136     ap[bs2*_i + bs*cidx + ridx] = value;  \
137 a_noinsert:; \
138     ailen[brow] = nrow; \
139   }
140 
141 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol)       \
142   { \
143     brow = row/bs;  \
144     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
145     rmax = bimax[brow]; nrow = bilen[brow]; \
146     bcol = col/bs; \
147     ridx = row % bs; cidx = col % bs; \
148     low  = 0; high = nrow; \
149     while (high-low > 3) { \
150       t = (low+high)/2; \
151       if (rp[t] > bcol) high = t; \
152       else              low  = t; \
153     } \
154     for (_i=low; _i<high; _i++) { \
155       if (rp[_i] > bcol) break; \
156       if (rp[_i] == bcol) { \
157         bap = ap +  bs2*_i + bs*cidx + ridx; \
158         if (addv == ADD_VALUES) *bap += value;  \
159         else                    *bap  = value;  \
160         goto b_noinsert; \
161       } \
162     } \
163     if (b->nonew == 1) goto b_noinsert; \
164     PetscCheckFalse(b->nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
165     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
166     N = nrow++ - 1;  \
167     /* shift up all the later entries in this row */ \
168     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
169     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr);\
170     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
171     rp[_i]                      = bcol;  \
172     ap[bs2*_i + bs*cidx + ridx] = value;  \
173 b_noinsert:; \
174     bilen[brow] = nrow; \
175   }
176 
177 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
178 {
179   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
180   MatScalar      value;
181   PetscBool      roworiented = baij->roworiented;
182   PetscErrorCode ierr;
183   PetscInt       i,j,row,col;
184   PetscInt       rstart_orig=mat->rmap->rstart;
185   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
186   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
187 
188   /* Some Variables required in the macro */
189   Mat         A     = baij->A;
190   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
191   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
192   MatScalar   *aa   =a->a;
193 
194   Mat         B     = baij->B;
195   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
196   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
197   MatScalar   *ba   =b->a;
198 
199   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
200   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
201   MatScalar *ap,*bap;
202 
203   PetscFunctionBegin;
204   for (i=0; i<m; i++) {
205     if (im[i] < 0) continue;
206     PetscCheckFalse(im[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
207     if (im[i] >= rstart_orig && im[i] < rend_orig) {
208       row = im[i] - rstart_orig;
209       for (j=0; j<n; j++) {
210         if (in[j] >= cstart_orig && in[j] < cend_orig) {
211           col = in[j] - cstart_orig;
212           if (roworiented) value = v[i*n+j];
213           else             value = v[i+j*m];
214           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
215         } else if (in[j] < 0) continue;
216         else PetscCheckFalse(in[j] >= mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
217         else {
218           if (mat->was_assembled) {
219             if (!baij->colmap) {
220               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
221             }
222 #if defined(PETSC_USE_CTABLE)
223             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
224             col  = col - 1;
225 #else
226             col = baij->colmap[in[j]/bs] - 1;
227 #endif
228             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
229               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
230               col  =  in[j];
231               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
232               B    = baij->B;
233               b    = (Mat_SeqBAIJ*)(B)->data;
234               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
235               ba   =b->a;
236             } else PetscCheckFalse(col < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
237             else col += in[j]%bs;
238           } else col = in[j];
239           if (roworiented) value = v[i*n+j];
240           else             value = v[i+j*m];
241           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
242           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
243         }
244       }
245     } else {
246       PetscCheckFalse(mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
247       if (!baij->donotstash) {
248         mat->assembled = PETSC_FALSE;
249         if (roworiented) {
250           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
251         } else {
252           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
253         }
254       }
255     }
256   }
257   PetscFunctionReturn(0);
258 }
259 
260 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
261 {
262   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
263   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
264   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
265   PetscErrorCode    ierr;
266   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
267   PetscBool         roworiented=a->roworiented;
268   const PetscScalar *value     = v;
269   MatScalar         *ap,*aa = a->a,*bap;
270 
271   PetscFunctionBegin;
272   rp   = aj + ai[row];
273   ap   = aa + bs2*ai[row];
274   rmax = imax[row];
275   nrow = ailen[row];
276   value = v;
277   low = 0;
278   high = nrow;
279   while (high-low > 7) {
280     t = (low+high)/2;
281     if (rp[t] > col) high = t;
282     else             low  = t;
283   }
284   for (i=low; i<high; i++) {
285     if (rp[i] > col) break;
286     if (rp[i] == col) {
287       bap = ap +  bs2*i;
288       if (roworiented) {
289         if (is == ADD_VALUES) {
290           for (ii=0; ii<bs; ii++) {
291             for (jj=ii; jj<bs2; jj+=bs) {
292               bap[jj] += *value++;
293             }
294           }
295         } else {
296           for (ii=0; ii<bs; ii++) {
297             for (jj=ii; jj<bs2; jj+=bs) {
298               bap[jj] = *value++;
299             }
300           }
301         }
302       } else {
303         if (is == ADD_VALUES) {
304           for (ii=0; ii<bs; ii++,value+=bs) {
305             for (jj=0; jj<bs; jj++) {
306               bap[jj] += value[jj];
307             }
308             bap += bs;
309           }
310         } else {
311           for (ii=0; ii<bs; ii++,value+=bs) {
312             for (jj=0; jj<bs; jj++) {
313               bap[jj]  = value[jj];
314             }
315             bap += bs;
316           }
317         }
318       }
319       goto noinsert2;
320     }
321   }
322   if (nonew == 1) goto noinsert2;
323   PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
324   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
325   N = nrow++ - 1; high++;
326   /* shift up all the later entries in this row */
327   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
328   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
329   rp[i] = col;
330   bap   = ap +  bs2*i;
331   if (roworiented) {
332     for (ii=0; ii<bs; ii++) {
333       for (jj=ii; jj<bs2; jj+=bs) {
334         bap[jj] = *value++;
335       }
336     }
337   } else {
338     for (ii=0; ii<bs; ii++) {
339       for (jj=0; jj<bs; jj++) {
340         *bap++ = *value++;
341       }
342     }
343   }
344   noinsert2:;
345   ailen[row] = nrow;
346   PetscFunctionReturn(0);
347 }
348 
349 /*
350     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
351     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
352 */
353 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
354 {
355   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
356   const PetscScalar *value;
357   MatScalar         *barray     = baij->barray;
358   PetscBool         roworiented = baij->roworiented;
359   PetscErrorCode    ierr;
360   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
361   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
362   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
363 
364   PetscFunctionBegin;
365   if (!barray) {
366     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
367     baij->barray = barray;
368   }
369 
370   if (roworiented) stepval = (n-1)*bs;
371   else stepval = (m-1)*bs;
372 
373   for (i=0; i<m; i++) {
374     if (im[i] < 0) continue;
375     PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1);
376     if (im[i] >= rstart && im[i] < rend) {
377       row = im[i] - rstart;
378       for (j=0; j<n; j++) {
379         /* If NumCol = 1 then a copy is not required */
380         if ((roworiented) && (n == 1)) {
381           barray = (MatScalar*)v + i*bs2;
382         } else if ((!roworiented) && (m == 1)) {
383           barray = (MatScalar*)v + j*bs2;
384         } else { /* Here a copy is required */
385           if (roworiented) {
386             value = v + (i*(stepval+bs) + j)*bs;
387           } else {
388             value = v + (j*(stepval+bs) + i)*bs;
389           }
390           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
391             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
392             barray += bs;
393           }
394           barray -= bs2;
395         }
396 
397         if (in[j] >= cstart && in[j] < cend) {
398           col  = in[j] - cstart;
399           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
400         } else if (in[j] < 0) continue;
401         else PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1);
402         else {
403           if (mat->was_assembled) {
404             if (!baij->colmap) {
405               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
406             }
407 
408 #if defined(PETSC_USE_DEBUG)
409 #if defined(PETSC_USE_CTABLE)
410             { PetscInt data;
411               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
412               PetscCheckFalse((data - 1) % bs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
413             }
414 #else
415             PetscCheckFalse((baij->colmap[in[j]] - 1) % bs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
416 #endif
417 #endif
418 #if defined(PETSC_USE_CTABLE)
419             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
420             col  = (col - 1)/bs;
421 #else
422             col = (baij->colmap[in[j]] - 1)/bs;
423 #endif
424             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
425               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
426               col  =  in[j];
427             } else PetscCheckFalse(col < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix",im[i],in[j]);
428           } else col = in[j];
429           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
430         }
431       }
432     } else {
433       PetscCheckFalse(mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
434       if (!baij->donotstash) {
435         if (roworiented) {
436           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
437         } else {
438           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
439         }
440       }
441     }
442   }
443   PetscFunctionReturn(0);
444 }
445 
446 #define HASH_KEY 0.6180339887
447 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
448 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
449 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
450 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
451 {
452   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
453   PetscBool      roworiented = baij->roworiented;
454   PetscErrorCode ierr;
455   PetscInt       i,j,row,col;
456   PetscInt       rstart_orig=mat->rmap->rstart;
457   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
458   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
459   PetscReal      tmp;
460   MatScalar      **HD = baij->hd,value;
461   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
462 
463   PetscFunctionBegin;
464   for (i=0; i<m; i++) {
465     if (PetscDefined(USE_DEBUG)) {
466       PetscCheckFalse(im[i] < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
467       PetscCheckFalse(im[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
468     }
469     row = im[i];
470     if (row >= rstart_orig && row < rend_orig) {
471       for (j=0; j<n; j++) {
472         col = in[j];
473         if (roworiented) value = v[i*n+j];
474         else             value = v[i+j*m];
475         /* Look up PetscInto the Hash Table */
476         key = (row/bs)*Nbs+(col/bs)+1;
477         h1  = HASH(size,key,tmp);
478 
479         idx = h1;
480         if (PetscDefined(USE_DEBUG)) {
481           insert_ct++;
482           total_ct++;
483           if (HT[idx] != key) {
484             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
485             if (idx == size) {
486               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
487               PetscCheckFalse(idx == h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
488             }
489           }
490         } else if (HT[idx] != key) {
491           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
492           if (idx == size) {
493             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
494             PetscCheckFalse(idx == h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
495           }
496         }
497         /* A HASH table entry is found, so insert the values at the correct address */
498         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
499         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
500       }
501     } else if (!baij->donotstash) {
502       if (roworiented) {
503         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
504       } else {
505         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
506       }
507     }
508   }
509   if (PetscDefined(USE_DEBUG)) {
510     baij->ht_total_ct  += total_ct;
511     baij->ht_insert_ct += insert_ct;
512   }
513   PetscFunctionReturn(0);
514 }
515 
516 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
517 {
518   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
519   PetscBool         roworiented = baij->roworiented;
520   PetscErrorCode    ierr;
521   PetscInt          i,j,ii,jj,row,col;
522   PetscInt          rstart=baij->rstartbs;
523   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
524   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
525   PetscReal         tmp;
526   MatScalar         **HD = baij->hd,*baij_a;
527   const PetscScalar *v_t,*value;
528   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
529 
530   PetscFunctionBegin;
531   if (roworiented) stepval = (n-1)*bs;
532   else stepval = (m-1)*bs;
533 
534   for (i=0; i<m; i++) {
535     if (PetscDefined(USE_DEBUG)) {
536       PetscCheckFalse(im[i] < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %" PetscInt_FMT,im[i]);
537       PetscCheckFalse(im[i] >= baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1);
538     }
539     row = im[i];
540     v_t = v + i*nbs2;
541     if (row >= rstart && row < rend) {
542       for (j=0; j<n; j++) {
543         col = in[j];
544 
545         /* Look up into the Hash Table */
546         key = row*Nbs+col+1;
547         h1  = HASH(size,key,tmp);
548 
549         idx = h1;
550         if (PetscDefined(USE_DEBUG)) {
551           total_ct++;
552           insert_ct++;
553           if (HT[idx] != key) {
554             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
555             if (idx == size) {
556               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
557               PetscCheckFalse(idx == h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
558             }
559           }
560         } else if (HT[idx] != key) {
561           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
562           if (idx == size) {
563             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
564             PetscCheckFalse(idx == h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
565           }
566         }
567         baij_a = HD[idx];
568         if (roworiented) {
569           /*value = v + i*(stepval+bs)*bs + j*bs;*/
570           /* value = v + (i*(stepval+bs)+j)*bs; */
571           value = v_t;
572           v_t  += bs;
573           if (addv == ADD_VALUES) {
574             for (ii=0; ii<bs; ii++,value+=stepval) {
575               for (jj=ii; jj<bs2; jj+=bs) {
576                 baij_a[jj] += *value++;
577               }
578             }
579           } else {
580             for (ii=0; ii<bs; ii++,value+=stepval) {
581               for (jj=ii; jj<bs2; jj+=bs) {
582                 baij_a[jj] = *value++;
583               }
584             }
585           }
586         } else {
587           value = v + j*(stepval+bs)*bs + i*bs;
588           if (addv == ADD_VALUES) {
589             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
590               for (jj=0; jj<bs; jj++) {
591                 baij_a[jj] += *value++;
592               }
593             }
594           } else {
595             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
596               for (jj=0; jj<bs; jj++) {
597                 baij_a[jj] = *value++;
598               }
599             }
600           }
601         }
602       }
603     } else {
604       if (!baij->donotstash) {
605         if (roworiented) {
606           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
607         } else {
608           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
609         }
610       }
611     }
612   }
613   if (PetscDefined(USE_DEBUG)) {
614     baij->ht_total_ct  += total_ct;
615     baij->ht_insert_ct += insert_ct;
616   }
617   PetscFunctionReturn(0);
618 }
619 
620 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
621 {
622   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
623   PetscErrorCode ierr;
624   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
625   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
626 
627   PetscFunctionBegin;
628   for (i=0; i<m; i++) {
629     if (idxm[i] < 0) continue; /* negative row */
630     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
631     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
632       row = idxm[i] - bsrstart;
633       for (j=0; j<n; j++) {
634         if (idxn[j] < 0) continue; /* negative column */
635         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
636         if (idxn[j] >= bscstart && idxn[j] < bscend) {
637           col  = idxn[j] - bscstart;
638           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
639         } else {
640           if (!baij->colmap) {
641             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
642           }
643 #if defined(PETSC_USE_CTABLE)
644           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
645           data--;
646 #else
647           data = baij->colmap[idxn[j]/bs]-1;
648 #endif
649           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
650           else {
651             col  = data + idxn[j]%bs;
652             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
653           }
654         }
655       }
656     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
657   }
658   PetscFunctionReturn(0);
659 }
660 
661 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
662 {
663   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
664   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
665   PetscErrorCode ierr;
666   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
667   PetscReal      sum = 0.0;
668   MatScalar      *v;
669 
670   PetscFunctionBegin;
671   if (baij->size == 1) {
672     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
673   } else {
674     if (type == NORM_FROBENIUS) {
675       v  = amat->a;
676       nz = amat->nz*bs2;
677       for (i=0; i<nz; i++) {
678         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
679       }
680       v  = bmat->a;
681       nz = bmat->nz*bs2;
682       for (i=0; i<nz; i++) {
683         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
684       }
685       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
686       *nrm = PetscSqrtReal(*nrm);
687     } else if (type == NORM_1) { /* max column sum */
688       PetscReal *tmp,*tmp2;
689       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
690       ierr = PetscCalloc1(mat->cmap->N,&tmp);CHKERRQ(ierr);
691       ierr = PetscMalloc1(mat->cmap->N,&tmp2);CHKERRQ(ierr);
692       v    = amat->a; jj = amat->j;
693       for (i=0; i<amat->nz; i++) {
694         for (j=0; j<bs; j++) {
695           col = bs*(cstart + *jj) + j; /* column index */
696           for (row=0; row<bs; row++) {
697             tmp[col] += PetscAbsScalar(*v);  v++;
698           }
699         }
700         jj++;
701       }
702       v = bmat->a; jj = bmat->j;
703       for (i=0; i<bmat->nz; i++) {
704         for (j=0; j<bs; j++) {
705           col = bs*garray[*jj] + j;
706           for (row=0; row<bs; row++) {
707             tmp[col] += PetscAbsScalar(*v); v++;
708           }
709         }
710         jj++;
711       }
712       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
713       *nrm = 0.0;
714       for (j=0; j<mat->cmap->N; j++) {
715         if (tmp2[j] > *nrm) *nrm = tmp2[j];
716       }
717       ierr = PetscFree(tmp);CHKERRQ(ierr);
718       ierr = PetscFree(tmp2);CHKERRQ(ierr);
719     } else if (type == NORM_INFINITY) { /* max row sum */
720       PetscReal *sums;
721       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
722       sum  = 0.0;
723       for (j=0; j<amat->mbs; j++) {
724         for (row=0; row<bs; row++) sums[row] = 0.0;
725         v  = amat->a + bs2*amat->i[j];
726         nz = amat->i[j+1]-amat->i[j];
727         for (i=0; i<nz; i++) {
728           for (col=0; col<bs; col++) {
729             for (row=0; row<bs; row++) {
730               sums[row] += PetscAbsScalar(*v); v++;
731             }
732           }
733         }
734         v  = bmat->a + bs2*bmat->i[j];
735         nz = bmat->i[j+1]-bmat->i[j];
736         for (i=0; i<nz; i++) {
737           for (col=0; col<bs; col++) {
738             for (row=0; row<bs; row++) {
739               sums[row] += PetscAbsScalar(*v); v++;
740             }
741           }
742         }
743         for (row=0; row<bs; row++) {
744           if (sums[row] > sum) sum = sums[row];
745         }
746       }
747       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
748       ierr = PetscFree(sums);CHKERRQ(ierr);
749     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
750   }
751   PetscFunctionReturn(0);
752 }
753 
754 /*
755   Creates the hash table, and sets the table
756   This table is created only once.
757   If new entried need to be added to the matrix
758   then the hash table has to be destroyed and
759   recreated.
760 */
761 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
762 {
763   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
764   Mat            A     = baij->A,B=baij->B;
765   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
766   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
767   PetscErrorCode ierr;
768   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
769   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
770   PetscInt       *HT,key;
771   MatScalar      **HD;
772   PetscReal      tmp;
773 #if defined(PETSC_USE_INFO)
774   PetscInt ct=0,max=0;
775 #endif
776 
777   PetscFunctionBegin;
778   if (baij->ht) PetscFunctionReturn(0);
779 
780   baij->ht_size = (PetscInt)(factor*nz);
781   ht_size       = baij->ht_size;
782 
783   /* Allocate Memory for Hash Table */
784   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
785   HD   = baij->hd;
786   HT   = baij->ht;
787 
788   /* Loop Over A */
789   for (i=0; i<a->mbs; i++) {
790     for (j=ai[i]; j<ai[i+1]; j++) {
791       row = i+rstart;
792       col = aj[j]+cstart;
793 
794       key = row*Nbs + col + 1;
795       h1  = HASH(ht_size,key,tmp);
796       for (k=0; k<ht_size; k++) {
797         if (!HT[(h1+k)%ht_size]) {
798           HT[(h1+k)%ht_size] = key;
799           HD[(h1+k)%ht_size] = a->a + j*bs2;
800           break;
801 #if defined(PETSC_USE_INFO)
802         } else {
803           ct++;
804 #endif
805         }
806       }
807 #if defined(PETSC_USE_INFO)
808       if (k> max) max = k;
809 #endif
810     }
811   }
812   /* Loop Over B */
813   for (i=0; i<b->mbs; i++) {
814     for (j=bi[i]; j<bi[i+1]; j++) {
815       row = i+rstart;
816       col = garray[bj[j]];
817       key = row*Nbs + col + 1;
818       h1  = HASH(ht_size,key,tmp);
819       for (k=0; k<ht_size; k++) {
820         if (!HT[(h1+k)%ht_size]) {
821           HT[(h1+k)%ht_size] = key;
822           HD[(h1+k)%ht_size] = b->a + j*bs2;
823           break;
824 #if defined(PETSC_USE_INFO)
825         } else {
826           ct++;
827 #endif
828         }
829       }
830 #if defined(PETSC_USE_INFO)
831       if (k> max) max = k;
832 #endif
833     }
834   }
835 
836   /* Print Summary */
837 #if defined(PETSC_USE_INFO)
838   for (i=0,j=0; i<ht_size; i++) {
839     if (HT[i]) j++;
840   }
841   ierr = PetscInfo(mat,"Average Search = %5.2g,max search = %" PetscInt_FMT "\n",(!j) ? (double)0.0:(double)(((PetscReal)(ct+j))/(double)j),max);CHKERRQ(ierr);
842 #endif
843   PetscFunctionReturn(0);
844 }
845 
846 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
847 {
848   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
849   PetscErrorCode ierr;
850   PetscInt       nstash,reallocs;
851 
852   PetscFunctionBegin;
853   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
854 
855   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
856   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
857   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
858   ierr = PetscInfo(mat,"Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
859   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
860   ierr = PetscInfo(mat,"Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
861   PetscFunctionReturn(0);
862 }
863 
864 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
865 {
866   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
867   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
868   PetscErrorCode ierr;
869   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
870   PetscInt       *row,*col;
871   PetscBool      r1,r2,r3,other_disassembled;
872   MatScalar      *val;
873   PetscMPIInt    n;
874 
875   PetscFunctionBegin;
876   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
877   if (!baij->donotstash && !mat->nooffprocentries) {
878     while (1) {
879       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
880       if (!flg) break;
881 
882       for (i=0; i<n;) {
883         /* Now identify the consecutive vals belonging to the same row */
884         for (j=i,rstart=row[j]; j<n; j++) {
885           if (row[j] != rstart) break;
886         }
887         if (j < n) ncols = j-i;
888         else       ncols = n-i;
889         /* Now assemble all these values with a single function call */
890         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
891         i    = j;
892       }
893     }
894     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
895     /* Now process the block-stash. Since the values are stashed column-oriented,
896        set the roworiented flag to column oriented, and after MatSetValues()
897        restore the original flags */
898     r1 = baij->roworiented;
899     r2 = a->roworiented;
900     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
901 
902     baij->roworiented = PETSC_FALSE;
903     a->roworiented    = PETSC_FALSE;
904 
905     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
906     while (1) {
907       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
908       if (!flg) break;
909 
910       for (i=0; i<n;) {
911         /* Now identify the consecutive vals belonging to the same row */
912         for (j=i,rstart=row[j]; j<n; j++) {
913           if (row[j] != rstart) break;
914         }
915         if (j < n) ncols = j-i;
916         else       ncols = n-i;
917         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
918         i    = j;
919       }
920     }
921     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
922 
923     baij->roworiented = r1;
924     a->roworiented    = r2;
925 
926     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
927   }
928 
929   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
930   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
931 
932   /* determine if any processor has disassembled, if so we must
933      also disassemble ourselfs, in order that we may reassemble. */
934   /*
935      if nonzero structure of submatrix B cannot change then we know that
936      no processor disassembled thus we can skip this stuff
937   */
938   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
939     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
940     if (mat->was_assembled && !other_disassembled) {
941       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
942     }
943   }
944 
945   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
946     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
947   }
948   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
949   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
950 
951 #if defined(PETSC_USE_INFO)
952   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
953     ierr = PetscInfo(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
954 
955     baij->ht_total_ct  = 0;
956     baij->ht_insert_ct = 0;
957   }
958 #endif
959   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
960     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
961 
962     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
963     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
964   }
965 
966   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
967 
968   baij->rowvalues = NULL;
969 
970   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
971   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
972     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
973     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
974   }
975   PetscFunctionReturn(0);
976 }
977 
978 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
979 #include <petscdraw.h>
980 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
981 {
982   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
983   PetscErrorCode    ierr;
984   PetscMPIInt       rank = baij->rank;
985   PetscInt          bs   = mat->rmap->bs;
986   PetscBool         iascii,isdraw;
987   PetscViewer       sviewer;
988   PetscViewerFormat format;
989 
990   PetscFunctionBegin;
991   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
992   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
993   if (iascii) {
994     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
995     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
996       MatInfo info;
997       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
998       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
999       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1000       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n",
1001                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
1002       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1003       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1004       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1005       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1006       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1007       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1008       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1009       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1010       PetscFunctionReturn(0);
1011     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1012       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %" PetscInt_FMT "\n",bs);CHKERRQ(ierr);
1013       PetscFunctionReturn(0);
1014     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1015       PetscFunctionReturn(0);
1016     }
1017   }
1018 
1019   if (isdraw) {
1020     PetscDraw draw;
1021     PetscBool isnull;
1022     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1023     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1024     if (isnull) PetscFunctionReturn(0);
1025   }
1026 
1027   {
1028     /* assemble the entire matrix onto first processor. */
1029     Mat         A;
1030     Mat_SeqBAIJ *Aloc;
1031     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1032     MatScalar   *a;
1033     const char  *matname;
1034 
1035     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1036     /* Perhaps this should be the type of mat? */
1037     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1038     if (rank == 0) {
1039       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1040     } else {
1041       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1042     }
1043     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1044     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1045     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1046     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1047 
1048     /* copy over the A part */
1049     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1050     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1051     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1052 
1053     for (i=0; i<mbs; i++) {
1054       rvals[0] = bs*(baij->rstartbs + i);
1055       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1056       for (j=ai[i]; j<ai[i+1]; j++) {
1057         col = (baij->cstartbs+aj[j])*bs;
1058         for (k=0; k<bs; k++) {
1059           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1060           col++; a += bs;
1061         }
1062       }
1063     }
1064     /* copy over the B part */
1065     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1066     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1067     for (i=0; i<mbs; i++) {
1068       rvals[0] = bs*(baij->rstartbs + i);
1069       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1070       for (j=ai[i]; j<ai[i+1]; j++) {
1071         col = baij->garray[aj[j]]*bs;
1072         for (k=0; k<bs; k++) {
1073           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1074           col++; a += bs;
1075         }
1076       }
1077     }
1078     ierr = PetscFree(rvals);CHKERRQ(ierr);
1079     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1080     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1081     /*
1082        Everyone has to call to draw the matrix since the graphics waits are
1083        synchronized across all processors that share the PetscDraw object
1084     */
1085     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1086     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1087     if (rank == 0) {
1088       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1089       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1090     }
1091     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1092     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1093     ierr = MatDestroy(&A);CHKERRQ(ierr);
1094   }
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1099 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1100 {
1101   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
1102   Mat_SeqBAIJ    *A   = (Mat_SeqBAIJ*)aij->A->data;
1103   Mat_SeqBAIJ    *B   = (Mat_SeqBAIJ*)aij->B->data;
1104   const PetscInt *garray = aij->garray;
1105   PetscInt       header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l;
1106   PetscInt       *rowlens,*colidxs;
1107   PetscScalar    *matvals;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1112 
1113   M  = mat->rmap->N;
1114   N  = mat->cmap->N;
1115   m  = mat->rmap->n;
1116   rs = mat->rmap->rstart;
1117   cs = mat->cmap->rstart;
1118   bs = mat->rmap->bs;
1119   nz = bs*bs*(A->nz + B->nz);
1120 
1121   /* write matrix header */
1122   header[0] = MAT_FILE_CLASSID;
1123   header[1] = M; header[2] = N; header[3] = nz;
1124   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1125   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1126 
1127   /* fill in and store row lengths */
1128   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1129   for (cnt=0, i=0; i<A->mbs; i++)
1130     for (j=0; j<bs; j++)
1131       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]);
1132   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1133   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1134 
1135   /* fill in and store column indices */
1136   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1137   for (cnt=0, i=0; i<A->mbs; i++) {
1138     for (k=0; k<bs; k++) {
1139       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1140         if (garray[B->j[jb]] > cs/bs) break;
1141         for (l=0; l<bs; l++)
1142           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1143       }
1144       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1145         for (l=0; l<bs; l++)
1146           colidxs[cnt++] = bs*A->j[ja] + l + cs;
1147       for (; jb<B->i[i+1]; jb++)
1148         for (l=0; l<bs; l++)
1149           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1150     }
1151   }
1152   PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1153   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT);CHKERRQ(ierr);
1154   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1155 
1156   /* fill in and store nonzero values */
1157   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1158   for (cnt=0, i=0; i<A->mbs; i++) {
1159     for (k=0; k<bs; k++) {
1160       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1161         if (garray[B->j[jb]] > cs/bs) break;
1162         for (l=0; l<bs; l++)
1163           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1164       }
1165       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1166         for (l=0; l<bs; l++)
1167           matvals[cnt++] = A->a[bs*(bs*ja + l) + k];
1168       for (; jb<B->i[i+1]; jb++)
1169         for (l=0; l<bs; l++)
1170           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1171     }
1172   }
1173   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR);CHKERRQ(ierr);
1174   ierr = PetscFree(matvals);CHKERRQ(ierr);
1175 
1176   /* write block size option to the viewer's .info file */
1177   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1182 {
1183   PetscErrorCode ierr;
1184   PetscBool      iascii,isdraw,issocket,isbinary;
1185 
1186   PetscFunctionBegin;
1187   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1188   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1189   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1190   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1191   if (iascii || isdraw || issocket) {
1192     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1193   } else if (isbinary) {
1194     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1195   }
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1200 {
1201   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1202   PetscErrorCode ierr;
1203 
1204   PetscFunctionBegin;
1205 #if defined(PETSC_USE_LOG)
1206   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
1207 #endif
1208   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1209   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1210   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1211   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1212 #if defined(PETSC_USE_CTABLE)
1213   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1214 #else
1215   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1216 #endif
1217   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1218   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1219   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1220   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1221   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1222   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1223   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1224   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1225 
1226   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1227   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1228   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1229   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1230   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1231   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1232   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1233   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1234   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1235 #if defined(PETSC_HAVE_HYPRE)
1236   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr);
1237 #endif
1238   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr);
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1243 {
1244   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1245   PetscErrorCode ierr;
1246   PetscInt       nt;
1247 
1248   PetscFunctionBegin;
1249   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1250   PetscCheckFalse(nt != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1251   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1252   PetscCheckFalse(nt != A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1253   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1254   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1255   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1256   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1261 {
1262   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1263   PetscErrorCode ierr;
1264 
1265   PetscFunctionBegin;
1266   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1267   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1268   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1269   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1274 {
1275   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1276   PetscErrorCode ierr;
1277 
1278   PetscFunctionBegin;
1279   /* do nondiagonal part */
1280   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1281   /* do local part */
1282   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1283   /* add partial results together */
1284   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1285   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1290 {
1291   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1292   PetscErrorCode ierr;
1293 
1294   PetscFunctionBegin;
1295   /* do nondiagonal part */
1296   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1297   /* do local part */
1298   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1299   /* add partial results together */
1300   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1301   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 /*
1306   This only works correctly for square matrices where the subblock A->A is the
1307    diagonal block
1308 */
1309 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1310 {
1311   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1312   PetscErrorCode ierr;
1313 
1314   PetscFunctionBegin;
1315   PetscCheckFalse(A->rmap->N != A->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1316   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1321 {
1322   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1323   PetscErrorCode ierr;
1324 
1325   PetscFunctionBegin;
1326   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1327   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1332 {
1333   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1334   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1335   PetscErrorCode ierr;
1336   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1337   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1338   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1339 
1340   PetscFunctionBegin;
1341   PetscCheckFalse(row < brstart || row >= brend,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1342   PetscCheckFalse(mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1343   mat->getrowactive = PETSC_TRUE;
1344 
1345   if (!mat->rowvalues && (idx || v)) {
1346     /*
1347         allocate enough space to hold information from the longest row.
1348     */
1349     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1350     PetscInt    max = 1,mbs = mat->mbs,tmp;
1351     for (i=0; i<mbs; i++) {
1352       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1353       if (max < tmp) max = tmp;
1354     }
1355     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1356   }
1357   lrow = row - brstart;
1358 
1359   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1360   if (!v)   {pvA = NULL; pvB = NULL;}
1361   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1362   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1363   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1364   nztot = nzA + nzB;
1365 
1366   cmap = mat->garray;
1367   if (v  || idx) {
1368     if (nztot) {
1369       /* Sort by increasing column numbers, assuming A and B already sorted */
1370       PetscInt imark = -1;
1371       if (v) {
1372         *v = v_p = mat->rowvalues;
1373         for (i=0; i<nzB; i++) {
1374           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1375           else break;
1376         }
1377         imark = i;
1378         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1379         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1380       }
1381       if (idx) {
1382         *idx = idx_p = mat->rowindices;
1383         if (imark > -1) {
1384           for (i=0; i<imark; i++) {
1385             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1386           }
1387         } else {
1388           for (i=0; i<nzB; i++) {
1389             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1390             else break;
1391           }
1392           imark = i;
1393         }
1394         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1395         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1396       }
1397     } else {
1398       if (idx) *idx = NULL;
1399       if (v)   *v   = NULL;
1400     }
1401   }
1402   *nz  = nztot;
1403   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1404   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1409 {
1410   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1411 
1412   PetscFunctionBegin;
1413   PetscCheckFalse(!baij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1414   baij->getrowactive = PETSC_FALSE;
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1419 {
1420   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1421   PetscErrorCode ierr;
1422 
1423   PetscFunctionBegin;
1424   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1425   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1430 {
1431   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1432   Mat            A  = a->A,B = a->B;
1433   PetscErrorCode ierr;
1434   PetscLogDouble isend[5],irecv[5];
1435 
1436   PetscFunctionBegin;
1437   info->block_size = (PetscReal)matin->rmap->bs;
1438 
1439   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1440 
1441   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1442   isend[3] = info->memory;  isend[4] = info->mallocs;
1443 
1444   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1445 
1446   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1447   isend[3] += info->memory;  isend[4] += info->mallocs;
1448 
1449   if (flag == MAT_LOCAL) {
1450     info->nz_used      = isend[0];
1451     info->nz_allocated = isend[1];
1452     info->nz_unneeded  = isend[2];
1453     info->memory       = isend[3];
1454     info->mallocs      = isend[4];
1455   } else if (flag == MAT_GLOBAL_MAX) {
1456     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1457 
1458     info->nz_used      = irecv[0];
1459     info->nz_allocated = irecv[1];
1460     info->nz_unneeded  = irecv[2];
1461     info->memory       = irecv[3];
1462     info->mallocs      = irecv[4];
1463   } else if (flag == MAT_GLOBAL_SUM) {
1464     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1465 
1466     info->nz_used      = irecv[0];
1467     info->nz_allocated = irecv[1];
1468     info->nz_unneeded  = irecv[2];
1469     info->memory       = irecv[3];
1470     info->mallocs      = irecv[4];
1471   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1472   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1473   info->fill_ratio_needed = 0;
1474   info->factor_mallocs    = 0;
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1479 {
1480   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1481   PetscErrorCode ierr;
1482 
1483   PetscFunctionBegin;
1484   switch (op) {
1485   case MAT_NEW_NONZERO_LOCATIONS:
1486   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1487   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1488   case MAT_KEEP_NONZERO_PATTERN:
1489   case MAT_NEW_NONZERO_LOCATION_ERR:
1490     MatCheckPreallocated(A,1);
1491     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1492     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1493     break;
1494   case MAT_ROW_ORIENTED:
1495     MatCheckPreallocated(A,1);
1496     a->roworiented = flg;
1497 
1498     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1499     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1500     break;
1501   case MAT_FORCE_DIAGONAL_ENTRIES:
1502   case MAT_SORTED_FULL:
1503     ierr = PetscInfo(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1504     break;
1505   case MAT_IGNORE_OFF_PROC_ENTRIES:
1506     a->donotstash = flg;
1507     break;
1508   case MAT_USE_HASH_TABLE:
1509     a->ht_flag = flg;
1510     a->ht_fact = 1.39;
1511     break;
1512   case MAT_SYMMETRIC:
1513   case MAT_STRUCTURALLY_SYMMETRIC:
1514   case MAT_HERMITIAN:
1515   case MAT_SUBMAT_SINGLEIS:
1516   case MAT_SYMMETRY_ETERNAL:
1517     MatCheckPreallocated(A,1);
1518     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1519     break;
1520   default:
1521     SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1522   }
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1527 {
1528   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1529   Mat_SeqBAIJ    *Aloc;
1530   Mat            B;
1531   PetscErrorCode ierr;
1532   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1533   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1534   MatScalar      *a;
1535 
1536   PetscFunctionBegin;
1537   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1538     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1539     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1540     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1541     /* Do not know preallocation information, but must set block size */
1542     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1543   } else {
1544     B = *matout;
1545   }
1546 
1547   /* copy over the A part */
1548   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1549   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1550   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1551 
1552   for (i=0; i<mbs; i++) {
1553     rvals[0] = bs*(baij->rstartbs + i);
1554     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1555     for (j=ai[i]; j<ai[i+1]; j++) {
1556       col = (baij->cstartbs+aj[j])*bs;
1557       for (k=0; k<bs; k++) {
1558         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1559 
1560         col++; a += bs;
1561       }
1562     }
1563   }
1564   /* copy over the B part */
1565   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1566   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1567   for (i=0; i<mbs; i++) {
1568     rvals[0] = bs*(baij->rstartbs + i);
1569     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1570     for (j=ai[i]; j<ai[i+1]; j++) {
1571       col = baij->garray[aj[j]]*bs;
1572       for (k=0; k<bs; k++) {
1573         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1574         col++;
1575         a += bs;
1576       }
1577     }
1578   }
1579   ierr = PetscFree(rvals);CHKERRQ(ierr);
1580   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1581   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1582 
1583   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1584   else {
1585     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1586   }
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1591 {
1592   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1593   Mat            a     = baij->A,b = baij->B;
1594   PetscErrorCode ierr;
1595   PetscInt       s1,s2,s3;
1596 
1597   PetscFunctionBegin;
1598   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1599   if (rr) {
1600     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1601     PetscCheckFalse(s1!=s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1602     /* Overlap communication with computation. */
1603     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1604   }
1605   if (ll) {
1606     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1607     PetscCheckFalse(s1!=s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1608     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1609   }
1610   /* scale  the diagonal block */
1611   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1612 
1613   if (rr) {
1614     /* Do a scatter end and then right scale the off-diagonal block */
1615     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1616     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1617   }
1618   PetscFunctionReturn(0);
1619 }
1620 
1621 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1622 {
1623   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1624   PetscInt      *lrows;
1625   PetscInt       r, len;
1626   PetscBool      cong;
1627   PetscErrorCode ierr;
1628 
1629   PetscFunctionBegin;
1630   /* get locally owned rows */
1631   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
1632   /* fix right hand side if needed */
1633   if (x && b) {
1634     const PetscScalar *xx;
1635     PetscScalar       *bb;
1636 
1637     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1638     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1639     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1640     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1641     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1642   }
1643 
1644   /* actually zap the local rows */
1645   /*
1646         Zero the required rows. If the "diagonal block" of the matrix
1647      is square and the user wishes to set the diagonal we use separate
1648      code so that MatSetValues() is not called for each diagonal allocating
1649      new memory, thus calling lots of mallocs and slowing things down.
1650 
1651   */
1652   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1653   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1654   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1655   if ((diag != 0.0) && cong) {
1656     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1657   } else if (diag != 0.0) {
1658     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1659     PetscCheckFalse(((Mat_SeqBAIJ*)l->A->data)->nonew,PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1660        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1661     for (r = 0; r < len; ++r) {
1662       const PetscInt row = lrows[r] + A->rmap->rstart;
1663       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1664     }
1665     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1666     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1667   } else {
1668     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1669   }
1670   ierr = PetscFree(lrows);CHKERRQ(ierr);
1671 
1672   /* only change matrix nonzero state if pattern was allowed to be changed */
1673   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1674     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1675     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
1676   }
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1681 {
1682   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1683   PetscErrorCode    ierr;
1684   PetscMPIInt       n = A->rmap->n,p = 0;
1685   PetscInt          i,j,k,r,len = 0,row,col,count;
1686   PetscInt          *lrows,*owners = A->rmap->range;
1687   PetscSFNode       *rrows;
1688   PetscSF           sf;
1689   const PetscScalar *xx;
1690   PetscScalar       *bb,*mask;
1691   Vec               xmask,lmask;
1692   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1693   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1694   PetscScalar       *aa;
1695 
1696   PetscFunctionBegin;
1697   /* Create SF where leaves are input rows and roots are owned rows */
1698   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1699   for (r = 0; r < n; ++r) lrows[r] = -1;
1700   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1701   for (r = 0; r < N; ++r) {
1702     const PetscInt idx   = rows[r];
1703     PetscCheckFalse(idx < 0 || A->rmap->N <= idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N);
1704     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1705       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1706     }
1707     rrows[r].rank  = p;
1708     rrows[r].index = rows[r] - owners[p];
1709   }
1710   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1711   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1712   /* Collect flags for rows to be zeroed */
1713   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1714   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1715   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1716   /* Compress and put in row numbers */
1717   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1718   /* zero diagonal part of matrix */
1719   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1720   /* handle off diagonal part of matrix */
1721   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1722   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1723   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1724   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1725   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1726   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1727   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1728   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1729   if (x) {
1730     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1731     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1732     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1733     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1734   }
1735   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1736   /* remove zeroed rows of off diagonal matrix */
1737   for (i = 0; i < len; ++i) {
1738     row   = lrows[i];
1739     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1740     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1741     for (k = 0; k < count; ++k) {
1742       aa[0] = 0.0;
1743       aa   += bs;
1744     }
1745   }
1746   /* loop over all elements of off process part of matrix zeroing removed columns*/
1747   for (i = 0; i < l->B->rmap->N; ++i) {
1748     row = i/bs;
1749     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1750       for (k = 0; k < bs; ++k) {
1751         col = bs*baij->j[j] + k;
1752         if (PetscAbsScalar(mask[col])) {
1753           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1754           if (x) bb[i] -= aa[0]*xx[col];
1755           aa[0] = 0.0;
1756         }
1757       }
1758     }
1759   }
1760   if (x) {
1761     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1762     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1763   }
1764   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1765   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1766   ierr = PetscFree(lrows);CHKERRQ(ierr);
1767 
1768   /* only change matrix nonzero state if pattern was allowed to be changed */
1769   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1770     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1771     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
1772   }
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1777 {
1778   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1779   PetscErrorCode ierr;
1780 
1781   PetscFunctionBegin;
1782   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1783   PetscFunctionReturn(0);
1784 }
1785 
1786 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1787 
1788 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1789 {
1790   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1791   Mat            a,b,c,d;
1792   PetscBool      flg;
1793   PetscErrorCode ierr;
1794 
1795   PetscFunctionBegin;
1796   a = matA->A; b = matA->B;
1797   c = matB->A; d = matB->B;
1798 
1799   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1800   if (flg) {
1801     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1802   }
1803   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
1804   PetscFunctionReturn(0);
1805 }
1806 
1807 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1808 {
1809   PetscErrorCode ierr;
1810   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1811   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1812 
1813   PetscFunctionBegin;
1814   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1815   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1816     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1817   } else {
1818     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1819     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1820   }
1821   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1822   PetscFunctionReturn(0);
1823 }
1824 
1825 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1826 {
1827   PetscErrorCode ierr;
1828 
1829   PetscFunctionBegin;
1830   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
1831   PetscFunctionReturn(0);
1832 }
1833 
1834 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1835 {
1836   PetscErrorCode ierr;
1837   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1838   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1839   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1840 
1841   PetscFunctionBegin;
1842   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
1843   PetscFunctionReturn(0);
1844 }
1845 
1846 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1847 {
1848   PetscErrorCode ierr;
1849   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1850   PetscBLASInt   bnz,one=1;
1851   Mat_SeqBAIJ    *x,*y;
1852   PetscInt       bs2 = Y->rmap->bs*Y->rmap->bs;
1853 
1854   PetscFunctionBegin;
1855   if (str == SAME_NONZERO_PATTERN) {
1856     PetscScalar alpha = a;
1857     x    = (Mat_SeqBAIJ*)xx->A->data;
1858     y    = (Mat_SeqBAIJ*)yy->A->data;
1859     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1860     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1861     x    = (Mat_SeqBAIJ*)xx->B->data;
1862     y    = (Mat_SeqBAIJ*)yy->B->data;
1863     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1864     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1865     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1866   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1867     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1868   } else {
1869     Mat      B;
1870     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1871     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1872     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1873     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1874     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1875     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1876     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1877     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
1878     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1879     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1880     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1881     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1882     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1883     ierr = MatHeaderMerge(Y,&B);CHKERRQ(ierr);
1884     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1885     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1886   }
1887   PetscFunctionReturn(0);
1888 }
1889 
1890 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1891 {
1892 #if defined(PETSC_USE_COMPLEX)
1893   PetscErrorCode ierr;
1894   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1895 
1896   PetscFunctionBegin;
1897   ierr = MatConjugate_SeqBAIJ(a->A);CHKERRQ(ierr);
1898   ierr = MatConjugate_SeqBAIJ(a->B);CHKERRQ(ierr);
1899 #else
1900   PetscFunctionBegin;
1901 #endif
1902   PetscFunctionReturn(0);
1903 }
1904 
1905 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1906 {
1907   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1908   PetscErrorCode ierr;
1909 
1910   PetscFunctionBegin;
1911   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1912   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1913   PetscFunctionReturn(0);
1914 }
1915 
1916 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1917 {
1918   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1919   PetscErrorCode ierr;
1920 
1921   PetscFunctionBegin;
1922   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1923   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1928 {
1929   PetscErrorCode ierr;
1930   IS             iscol_local;
1931   PetscInt       csize;
1932 
1933   PetscFunctionBegin;
1934   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1935   if (call == MAT_REUSE_MATRIX) {
1936     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1937     PetscCheckFalse(!iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1938   } else {
1939     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1940   }
1941   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1942   if (call == MAT_INITIAL_MATRIX) {
1943     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1944     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1945   }
1946   PetscFunctionReturn(0);
1947 }
1948 
1949 /*
1950   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1951   in local and then by concatenating the local matrices the end result.
1952   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1953   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1954 */
1955 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1956 {
1957   PetscErrorCode ierr;
1958   PetscMPIInt    rank,size;
1959   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1960   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1961   Mat            M,Mreuse;
1962   MatScalar      *vwork,*aa;
1963   MPI_Comm       comm;
1964   IS             isrow_new, iscol_new;
1965   Mat_SeqBAIJ    *aij;
1966 
1967   PetscFunctionBegin;
1968   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
1969   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
1970   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
1971   /* The compression and expansion should be avoided. Doesn't point
1972      out errors, might change the indices, hence buggey */
1973   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
1974   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
1975 
1976   if (call ==  MAT_REUSE_MATRIX) {
1977     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
1978     PetscCheckFalse(!Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1979     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr);
1980   } else {
1981     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr);
1982   }
1983   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
1984   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
1985   /*
1986       m - number of local rows
1987       n - number of columns (same on all processors)
1988       rstart - first row in new global matrix generated
1989   */
1990   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1991   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1992   m    = m/bs;
1993   n    = n/bs;
1994 
1995   if (call == MAT_INITIAL_MATRIX) {
1996     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1997     ii  = aij->i;
1998     jj  = aij->j;
1999 
2000     /*
2001         Determine the number of non-zeros in the diagonal and off-diagonal
2002         portions of the matrix in order to do correct preallocation
2003     */
2004 
2005     /* first get start and end of "diagonal" columns */
2006     if (csize == PETSC_DECIDE) {
2007       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2008       if (mglobal == n*bs) { /* square matrix */
2009         nlocal = m;
2010       } else {
2011         nlocal = n/size + ((n % size) > rank);
2012       }
2013     } else {
2014       nlocal = csize/bs;
2015     }
2016     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
2017     rstart = rend - nlocal;
2018     PetscCheckFalse(rank == size - 1 && rend != n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n);
2019 
2020     /* next, compute all the lengths */
2021     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2022     for (i=0; i<m; i++) {
2023       jend = ii[i+1] - ii[i];
2024       olen = 0;
2025       dlen = 0;
2026       for (j=0; j<jend; j++) {
2027         if (*jj < rstart || *jj >= rend) olen++;
2028         else dlen++;
2029         jj++;
2030       }
2031       olens[i] = olen;
2032       dlens[i] = dlen;
2033     }
2034     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2035     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2036     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2037     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2038     ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2039     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2040   } else {
2041     PetscInt ml,nl;
2042 
2043     M    = *newmat;
2044     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2045     PetscCheckFalse(ml != m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2046     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2047     /*
2048          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2049        rather than the slower MatSetValues().
2050     */
2051     M->was_assembled = PETSC_TRUE;
2052     M->assembled     = PETSC_FALSE;
2053   }
2054   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2055   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2056   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2057   ii   = aij->i;
2058   jj   = aij->j;
2059   aa   = aij->a;
2060   for (i=0; i<m; i++) {
2061     row   = rstart/bs + i;
2062     nz    = ii[i+1] - ii[i];
2063     cwork = jj;     jj += nz;
2064     vwork = aa;     aa += nz*bs*bs;
2065     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2066   }
2067 
2068   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2069   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2070   *newmat = M;
2071 
2072   /* save submatrix used in processor for next request */
2073   if (call ==  MAT_INITIAL_MATRIX) {
2074     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2075     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2076   }
2077   PetscFunctionReturn(0);
2078 }
2079 
2080 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2081 {
2082   MPI_Comm       comm,pcomm;
2083   PetscInt       clocal_size,nrows;
2084   const PetscInt *rows;
2085   PetscMPIInt    size;
2086   IS             crowp,lcolp;
2087   PetscErrorCode ierr;
2088 
2089   PetscFunctionBegin;
2090   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2091   /* make a collective version of 'rowp' */
2092   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2093   if (pcomm==comm) {
2094     crowp = rowp;
2095   } else {
2096     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2097     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2098     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2099     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2100   }
2101   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2102   /* make a local version of 'colp' */
2103   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2104   ierr = MPI_Comm_size(pcomm,&size);CHKERRMPI(ierr);
2105   if (size==1) {
2106     lcolp = colp;
2107   } else {
2108     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2109   }
2110   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2111   /* now we just get the submatrix */
2112   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2113   ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2114   /* clean up */
2115   if (pcomm!=comm) {
2116     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2117   }
2118   if (size>1) {
2119     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2120   }
2121   PetscFunctionReturn(0);
2122 }
2123 
2124 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2125 {
2126   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2127   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2128 
2129   PetscFunctionBegin;
2130   if (nghosts) *nghosts = B->nbs;
2131   if (ghosts) *ghosts = baij->garray;
2132   PetscFunctionReturn(0);
2133 }
2134 
2135 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2136 {
2137   Mat            B;
2138   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2139   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2140   Mat_SeqAIJ     *b;
2141   PetscErrorCode ierr;
2142   PetscMPIInt    size,rank,*recvcounts = NULL,*displs = NULL;
2143   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2144   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2145 
2146   PetscFunctionBegin;
2147   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
2148   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr);
2149 
2150   /* ----------------------------------------------------------------
2151      Tell every processor the number of nonzeros per row
2152   */
2153   ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr);
2154   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2155     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2156   }
2157   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2158   displs    = recvcounts + size;
2159   for (i=0; i<size; i++) {
2160     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2161     displs[i]     = A->rmap->range[i]/bs;
2162   }
2163   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2164   /* ---------------------------------------------------------------
2165      Create the sequential matrix of the same type as the local block diagonal
2166   */
2167   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2168   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2169   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2170   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2171   b    = (Mat_SeqAIJ*)B->data;
2172 
2173   /*--------------------------------------------------------------------
2174     Copy my part of matrix column indices over
2175   */
2176   sendcount  = ad->nz + bd->nz;
2177   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2178   a_jsendbuf = ad->j;
2179   b_jsendbuf = bd->j;
2180   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2181   cnt        = 0;
2182   for (i=0; i<n; i++) {
2183 
2184     /* put in lower diagonal portion */
2185     m = bd->i[i+1] - bd->i[i];
2186     while (m > 0) {
2187       /* is it above diagonal (in bd (compressed) numbering) */
2188       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2189       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2190       m--;
2191     }
2192 
2193     /* put in diagonal portion */
2194     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2195       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2196     }
2197 
2198     /* put in upper diagonal portion */
2199     while (m-- > 0) {
2200       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2201     }
2202   }
2203   PetscCheckFalse(cnt != sendcount,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT,sendcount,cnt);
2204 
2205   /*--------------------------------------------------------------------
2206     Gather all column indices to all processors
2207   */
2208   for (i=0; i<size; i++) {
2209     recvcounts[i] = 0;
2210     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2211       recvcounts[i] += lens[j];
2212     }
2213   }
2214   displs[0] = 0;
2215   for (i=1; i<size; i++) {
2216     displs[i] = displs[i-1] + recvcounts[i-1];
2217   }
2218   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2219   /*--------------------------------------------------------------------
2220     Assemble the matrix into useable form (note numerical values not yet set)
2221   */
2222   /* set the b->ilen (length of each row) values */
2223   ierr = PetscArraycpy(b->ilen,lens,A->rmap->N/bs);CHKERRQ(ierr);
2224   /* set the b->i indices */
2225   b->i[0] = 0;
2226   for (i=1; i<=A->rmap->N/bs; i++) {
2227     b->i[i] = b->i[i-1] + lens[i-1];
2228   }
2229   ierr = PetscFree(lens);CHKERRQ(ierr);
2230   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2231   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2232   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2233 
2234   if (A->symmetric) {
2235     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2236   } else if (A->hermitian) {
2237     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2238   } else if (A->structurally_symmetric) {
2239     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2240   }
2241   *newmat = B;
2242   PetscFunctionReturn(0);
2243 }
2244 
2245 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2246 {
2247   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2248   PetscErrorCode ierr;
2249   Vec            bb1 = NULL;
2250 
2251   PetscFunctionBegin;
2252   if (flag == SOR_APPLY_UPPER) {
2253     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2254     PetscFunctionReturn(0);
2255   }
2256 
2257   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2258     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2259   }
2260 
2261   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2262     if (flag & SOR_ZERO_INITIAL_GUESS) {
2263       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2264       its--;
2265     }
2266 
2267     while (its--) {
2268       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2269       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2270 
2271       /* update rhs: bb1 = bb - B*x */
2272       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2273       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2274 
2275       /* local sweep */
2276       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2277     }
2278   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2279     if (flag & SOR_ZERO_INITIAL_GUESS) {
2280       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2281       its--;
2282     }
2283     while (its--) {
2284       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2285       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2286 
2287       /* update rhs: bb1 = bb - B*x */
2288       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2289       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2290 
2291       /* local sweep */
2292       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2293     }
2294   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2295     if (flag & SOR_ZERO_INITIAL_GUESS) {
2296       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2297       its--;
2298     }
2299     while (its--) {
2300       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2301       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2302 
2303       /* update rhs: bb1 = bb - B*x */
2304       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2305       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2306 
2307       /* local sweep */
2308       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2309     }
2310   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2311 
2312   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2313   PetscFunctionReturn(0);
2314 }
2315 
2316 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A,PetscInt type,PetscReal *reductions)
2317 {
2318   PetscErrorCode ierr;
2319   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2320   PetscInt       m,N,i,*garray = aij->garray;
2321   PetscInt       ib,jb,bs = A->rmap->bs;
2322   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2323   MatScalar      *a_val = a_aij->a;
2324   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2325   MatScalar      *b_val = b_aij->a;
2326   PetscReal      *work;
2327 
2328   PetscFunctionBegin;
2329   ierr = MatGetSize(A,&m,&N);CHKERRQ(ierr);
2330   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2331   if (type == NORM_2) {
2332     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2333       for (jb=0; jb<bs; jb++) {
2334         for (ib=0; ib<bs; ib++) {
2335           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2336           a_val++;
2337         }
2338       }
2339     }
2340     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2341       for (jb=0; jb<bs; jb++) {
2342         for (ib=0; ib<bs; ib++) {
2343           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2344           b_val++;
2345         }
2346       }
2347     }
2348   } else if (type == NORM_1) {
2349     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2350       for (jb=0; jb<bs; jb++) {
2351         for (ib=0; ib<bs; ib++) {
2352           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2353           a_val++;
2354         }
2355       }
2356     }
2357     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2358       for (jb=0; jb<bs; jb++) {
2359        for (ib=0; ib<bs; ib++) {
2360           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2361           b_val++;
2362         }
2363       }
2364     }
2365   } else if (type == NORM_INFINITY) {
2366     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2367       for (jb=0; jb<bs; jb++) {
2368         for (ib=0; ib<bs; ib++) {
2369           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2370           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2371           a_val++;
2372         }
2373       }
2374     }
2375     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2376       for (jb=0; jb<bs; jb++) {
2377         for (ib=0; ib<bs; ib++) {
2378           int col = garray[b_aij->j[i]] * bs + jb;
2379           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2380           b_val++;
2381         }
2382       }
2383     }
2384   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2385     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2386       for (jb=0; jb<bs; jb++) {
2387         for (ib=0; ib<bs; ib++) {
2388           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2389           a_val++;
2390         }
2391       }
2392     }
2393     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2394       for (jb=0; jb<bs; jb++) {
2395        for (ib=0; ib<bs; ib++) {
2396           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2397           b_val++;
2398         }
2399       }
2400     }
2401   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2402     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2403       for (jb=0; jb<bs; jb++) {
2404         for (ib=0; ib<bs; ib++) {
2405           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2406           a_val++;
2407         }
2408       }
2409     }
2410     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2411       for (jb=0; jb<bs; jb++) {
2412        for (ib=0; ib<bs; ib++) {
2413           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2414           b_val++;
2415         }
2416       }
2417     }
2418   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
2419   if (type == NORM_INFINITY) {
2420     ierr = MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2421   } else {
2422     ierr = MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2423   }
2424   ierr = PetscFree(work);CHKERRQ(ierr);
2425   if (type == NORM_2) {
2426     for (i=0; i<N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2427   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2428     for (i=0; i<N; i++) reductions[i] /= m;
2429   }
2430   PetscFunctionReturn(0);
2431 }
2432 
2433 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2434 {
2435   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2436   PetscErrorCode ierr;
2437 
2438   PetscFunctionBegin;
2439   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2440   A->factorerrortype             = a->A->factorerrortype;
2441   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2442   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2443   PetscFunctionReturn(0);
2444 }
2445 
2446 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
2447 {
2448   PetscErrorCode ierr;
2449   Mat_MPIBAIJ    *maij = (Mat_MPIBAIJ*)Y->data;
2450   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)maij->A->data;
2451 
2452   PetscFunctionBegin;
2453   if (!Y->preallocated) {
2454     ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
2455   } else if (!aij->nz) {
2456     PetscInt nonew = aij->nonew;
2457     ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2458     aij->nonew = nonew;
2459   }
2460   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2465 {
2466   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2467   PetscErrorCode ierr;
2468 
2469   PetscFunctionBegin;
2470   PetscCheckFalse(A->rmap->n != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2471   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2472   if (d) {
2473     PetscInt rstart;
2474     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2475     *d += rstart/A->rmap->bs;
2476 
2477   }
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2482 {
2483   PetscFunctionBegin;
2484   *a = ((Mat_MPIBAIJ*)A->data)->A;
2485   PetscFunctionReturn(0);
2486 }
2487 
2488 /* -------------------------------------------------------------------*/
2489 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2490                                        MatGetRow_MPIBAIJ,
2491                                        MatRestoreRow_MPIBAIJ,
2492                                        MatMult_MPIBAIJ,
2493                                 /* 4*/ MatMultAdd_MPIBAIJ,
2494                                        MatMultTranspose_MPIBAIJ,
2495                                        MatMultTransposeAdd_MPIBAIJ,
2496                                        NULL,
2497                                        NULL,
2498                                        NULL,
2499                                 /*10*/ NULL,
2500                                        NULL,
2501                                        NULL,
2502                                        MatSOR_MPIBAIJ,
2503                                        MatTranspose_MPIBAIJ,
2504                                 /*15*/ MatGetInfo_MPIBAIJ,
2505                                        MatEqual_MPIBAIJ,
2506                                        MatGetDiagonal_MPIBAIJ,
2507                                        MatDiagonalScale_MPIBAIJ,
2508                                        MatNorm_MPIBAIJ,
2509                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2510                                        MatAssemblyEnd_MPIBAIJ,
2511                                        MatSetOption_MPIBAIJ,
2512                                        MatZeroEntries_MPIBAIJ,
2513                                 /*24*/ MatZeroRows_MPIBAIJ,
2514                                        NULL,
2515                                        NULL,
2516                                        NULL,
2517                                        NULL,
2518                                 /*29*/ MatSetUp_MPIBAIJ,
2519                                        NULL,
2520                                        NULL,
2521                                        MatGetDiagonalBlock_MPIBAIJ,
2522                                        NULL,
2523                                 /*34*/ MatDuplicate_MPIBAIJ,
2524                                        NULL,
2525                                        NULL,
2526                                        NULL,
2527                                        NULL,
2528                                 /*39*/ MatAXPY_MPIBAIJ,
2529                                        MatCreateSubMatrices_MPIBAIJ,
2530                                        MatIncreaseOverlap_MPIBAIJ,
2531                                        MatGetValues_MPIBAIJ,
2532                                        MatCopy_MPIBAIJ,
2533                                 /*44*/ NULL,
2534                                        MatScale_MPIBAIJ,
2535                                        MatShift_MPIBAIJ,
2536                                        NULL,
2537                                        MatZeroRowsColumns_MPIBAIJ,
2538                                 /*49*/ NULL,
2539                                        NULL,
2540                                        NULL,
2541                                        NULL,
2542                                        NULL,
2543                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2544                                        NULL,
2545                                        MatSetUnfactored_MPIBAIJ,
2546                                        MatPermute_MPIBAIJ,
2547                                        MatSetValuesBlocked_MPIBAIJ,
2548                                 /*59*/ MatCreateSubMatrix_MPIBAIJ,
2549                                        MatDestroy_MPIBAIJ,
2550                                        MatView_MPIBAIJ,
2551                                        NULL,
2552                                        NULL,
2553                                 /*64*/ NULL,
2554                                        NULL,
2555                                        NULL,
2556                                        NULL,
2557                                        NULL,
2558                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2559                                        NULL,
2560                                        NULL,
2561                                        NULL,
2562                                        NULL,
2563                                 /*74*/ NULL,
2564                                        MatFDColoringApply_BAIJ,
2565                                        NULL,
2566                                        NULL,
2567                                        NULL,
2568                                 /*79*/ NULL,
2569                                        NULL,
2570                                        NULL,
2571                                        NULL,
2572                                        MatLoad_MPIBAIJ,
2573                                 /*84*/ NULL,
2574                                        NULL,
2575                                        NULL,
2576                                        NULL,
2577                                        NULL,
2578                                 /*89*/ NULL,
2579                                        NULL,
2580                                        NULL,
2581                                        NULL,
2582                                        NULL,
2583                                 /*94*/ NULL,
2584                                        NULL,
2585                                        NULL,
2586                                        NULL,
2587                                        NULL,
2588                                 /*99*/ NULL,
2589                                        NULL,
2590                                        NULL,
2591                                        MatConjugate_MPIBAIJ,
2592                                        NULL,
2593                                 /*104*/NULL,
2594                                        MatRealPart_MPIBAIJ,
2595                                        MatImaginaryPart_MPIBAIJ,
2596                                        NULL,
2597                                        NULL,
2598                                 /*109*/NULL,
2599                                        NULL,
2600                                        NULL,
2601                                        NULL,
2602                                        MatMissingDiagonal_MPIBAIJ,
2603                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2604                                        NULL,
2605                                        MatGetGhosts_MPIBAIJ,
2606                                        NULL,
2607                                        NULL,
2608                                 /*119*/NULL,
2609                                        NULL,
2610                                        NULL,
2611                                        NULL,
2612                                        MatGetMultiProcBlock_MPIBAIJ,
2613                                 /*124*/NULL,
2614                                        MatGetColumnReductions_MPIBAIJ,
2615                                        MatInvertBlockDiagonal_MPIBAIJ,
2616                                        NULL,
2617                                        NULL,
2618                                /*129*/ NULL,
2619                                        NULL,
2620                                        NULL,
2621                                        NULL,
2622                                        NULL,
2623                                /*134*/ NULL,
2624                                        NULL,
2625                                        NULL,
2626                                        NULL,
2627                                        NULL,
2628                                /*139*/ MatSetBlockSizes_Default,
2629                                        NULL,
2630                                        NULL,
2631                                        MatFDColoringSetUp_MPIXAIJ,
2632                                        NULL,
2633                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2634                                        NULL,
2635                                        NULL,
2636                                        NULL
2637 };
2638 
2639 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
2640 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
2641 
2642 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2643 {
2644   PetscInt       m,rstart,cstart,cend;
2645   PetscInt       i,j,dlen,olen,nz,nz_max=0,*d_nnz=NULL,*o_nnz=NULL;
2646   const PetscInt *JJ    =NULL;
2647   PetscScalar    *values=NULL;
2648   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2649   PetscErrorCode ierr;
2650   PetscBool      nooffprocentries;
2651 
2652   PetscFunctionBegin;
2653   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2654   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2655   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2656   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2657   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2658   m      = B->rmap->n/bs;
2659   rstart = B->rmap->rstart/bs;
2660   cstart = B->cmap->rstart/bs;
2661   cend   = B->cmap->rend/bs;
2662 
2663   PetscCheckFalse(ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]);
2664   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2665   for (i=0; i<m; i++) {
2666     nz = ii[i+1] - ii[i];
2667     PetscCheckFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz);
2668     nz_max = PetscMax(nz_max,nz);
2669     dlen   = 0;
2670     olen   = 0;
2671     JJ     = jj + ii[i];
2672     for (j=0; j<nz; j++) {
2673       if (*JJ < cstart || *JJ >= cend) olen++;
2674       else dlen++;
2675       JJ++;
2676     }
2677     d_nnz[i] = dlen;
2678     o_nnz[i] = olen;
2679   }
2680   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2681   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2682 
2683   values = (PetscScalar*)V;
2684   if (!values) {
2685     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2686   }
2687   for (i=0; i<m; i++) {
2688     PetscInt          row    = i + rstart;
2689     PetscInt          ncols  = ii[i+1] - ii[i];
2690     const PetscInt    *icols = jj + ii[i];
2691     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2692       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2693       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2694     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2695       PetscInt j;
2696       for (j=0; j<ncols; j++) {
2697         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2698         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2699       }
2700     }
2701   }
2702 
2703   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2704   nooffprocentries    = B->nooffprocentries;
2705   B->nooffprocentries = PETSC_TRUE;
2706   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2707   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2708   B->nooffprocentries = nooffprocentries;
2709 
2710   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2711   PetscFunctionReturn(0);
2712 }
2713 
2714 /*@C
2715    MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
2716 
2717    Collective
2718 
2719    Input Parameters:
2720 +  B - the matrix
2721 .  bs - the block size
2722 .  i - the indices into j for the start of each local row (starts with zero)
2723 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2724 -  v - optional values in the matrix
2725 
2726    Level: advanced
2727 
2728    Notes:
2729     The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2730    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2731    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2732    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2733    block column and the second index is over columns within a block.
2734 
2735    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2736 
2737 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2738 @*/
2739 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2740 {
2741   PetscErrorCode ierr;
2742 
2743   PetscFunctionBegin;
2744   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2745   PetscValidType(B,1);
2746   PetscValidLogicalCollectiveInt(B,bs,2);
2747   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2748   PetscFunctionReturn(0);
2749 }
2750 
2751 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2752 {
2753   Mat_MPIBAIJ    *b;
2754   PetscErrorCode ierr;
2755   PetscInt       i;
2756   PetscMPIInt    size;
2757 
2758   PetscFunctionBegin;
2759   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2760   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2761   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2762   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2763 
2764   if (d_nnz) {
2765     for (i=0; i<B->rmap->n/bs; i++) {
2766       PetscCheckFalse(d_nnz[i] < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,d_nnz[i]);
2767     }
2768   }
2769   if (o_nnz) {
2770     for (i=0; i<B->rmap->n/bs; i++) {
2771       PetscCheckFalse(o_nnz[i] < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,o_nnz[i]);
2772     }
2773   }
2774 
2775   b      = (Mat_MPIBAIJ*)B->data;
2776   b->bs2 = bs*bs;
2777   b->mbs = B->rmap->n/bs;
2778   b->nbs = B->cmap->n/bs;
2779   b->Mbs = B->rmap->N/bs;
2780   b->Nbs = B->cmap->N/bs;
2781 
2782   for (i=0; i<=b->size; i++) {
2783     b->rangebs[i] = B->rmap->range[i]/bs;
2784   }
2785   b->rstartbs = B->rmap->rstart/bs;
2786   b->rendbs   = B->rmap->rend/bs;
2787   b->cstartbs = B->cmap->rstart/bs;
2788   b->cendbs   = B->cmap->rend/bs;
2789 
2790 #if defined(PETSC_USE_CTABLE)
2791   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2792 #else
2793   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2794 #endif
2795   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2796   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2797   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2798 
2799   /* Because the B will have been resized we simply destroy it and create a new one each time */
2800   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
2801   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2802   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2803   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2804   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2805   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2806 
2807   if (!B->preallocated) {
2808     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2809     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2810     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2811     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2812     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2813   }
2814 
2815   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2816   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2817   B->preallocated  = PETSC_TRUE;
2818   B->was_assembled = PETSC_FALSE;
2819   B->assembled     = PETSC_FALSE;
2820   PetscFunctionReturn(0);
2821 }
2822 
2823 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2824 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2825 
2826 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2827 {
2828   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2829   PetscErrorCode ierr;
2830   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2831   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2832   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2833 
2834   PetscFunctionBegin;
2835   ierr  = PetscMalloc1(M+1,&ii);CHKERRQ(ierr);
2836   ii[0] = 0;
2837   for (i=0; i<M; i++) {
2838     PetscCheckFalse((id[i+1] - id[i]) < 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT,i,id[i],id[i+1]);
2839     PetscCheckFalse((io[i+1] - io[i]) < 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT,i,io[i],io[i+1]);
2840     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2841     /* remove one from count of matrix has diagonal */
2842     for (j=id[i]; j<id[i+1]; j++) {
2843       if (jd[j] == i) {ii[i+1]--;break;}
2844     }
2845   }
2846   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2847   cnt  = 0;
2848   for (i=0; i<M; i++) {
2849     for (j=io[i]; j<io[i+1]; j++) {
2850       if (garray[jo[j]] > rstart) break;
2851       jj[cnt++] = garray[jo[j]];
2852     }
2853     for (k=id[i]; k<id[i+1]; k++) {
2854       if (jd[k] != i) {
2855         jj[cnt++] = rstart + jd[k];
2856       }
2857     }
2858     for (; j<io[i+1]; j++) {
2859       jj[cnt++] = garray[jo[j]];
2860     }
2861   }
2862   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2863   PetscFunctionReturn(0);
2864 }
2865 
2866 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2867 
2868 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2869 
2870 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2871 {
2872   PetscErrorCode ierr;
2873   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2874   Mat            B;
2875   Mat_MPIAIJ     *b;
2876 
2877   PetscFunctionBegin;
2878   PetscCheckFalse(!A->assembled,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2879 
2880   if (reuse == MAT_REUSE_MATRIX) {
2881     B = *newmat;
2882   } else {
2883     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2884     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2885     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2886     ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2887     ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2888     ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2889   }
2890   b = (Mat_MPIAIJ*) B->data;
2891 
2892   if (reuse == MAT_REUSE_MATRIX) {
2893     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr);
2894     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr);
2895   } else {
2896     ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2897     ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2898     ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2899     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2900     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2901     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2902     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2903   }
2904   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2905   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2906 
2907   if (reuse == MAT_INPLACE_MATRIX) {
2908     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
2909   } else {
2910    *newmat = B;
2911   }
2912   PetscFunctionReturn(0);
2913 }
2914 
2915 /*MC
2916    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2917 
2918    Options Database Keys:
2919 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2920 . -mat_block_size <bs> - set the blocksize used to store the matrix
2921 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2922 - -mat_use_hash_table <fact>
2923 
2924    Level: beginner
2925 
2926    Notes:
2927     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
2928     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
2929 
2930 .seealso: MatCreateBAIJ
2931 M*/
2932 
2933 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2934 
2935 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2936 {
2937   Mat_MPIBAIJ    *b;
2938   PetscErrorCode ierr;
2939   PetscBool      flg = PETSC_FALSE;
2940 
2941   PetscFunctionBegin;
2942   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2943   B->data = (void*)b;
2944 
2945   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2946   B->assembled = PETSC_FALSE;
2947 
2948   B->insertmode = NOT_SET_VALUES;
2949   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr);
2950   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRMPI(ierr);
2951 
2952   /* build local table of row and column ownerships */
2953   ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr);
2954 
2955   /* build cache for off array entries formed */
2956   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2957 
2958   b->donotstash  = PETSC_FALSE;
2959   b->colmap      = NULL;
2960   b->garray      = NULL;
2961   b->roworiented = PETSC_TRUE;
2962 
2963   /* stuff used in block assembly */
2964   b->barray = NULL;
2965 
2966   /* stuff used for matrix vector multiply */
2967   b->lvec  = NULL;
2968   b->Mvctx = NULL;
2969 
2970   /* stuff for MatGetRow() */
2971   b->rowindices   = NULL;
2972   b->rowvalues    = NULL;
2973   b->getrowactive = PETSC_FALSE;
2974 
2975   /* hash table stuff */
2976   b->ht           = NULL;
2977   b->hd           = NULL;
2978   b->ht_size      = 0;
2979   b->ht_flag      = PETSC_FALSE;
2980   b->ht_fact      = 0;
2981   b->ht_total_ct  = 0;
2982   b->ht_insert_ct = 0;
2983 
2984   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2985   b->ijonly = PETSC_FALSE;
2986 
2987   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
2988   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
2989   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
2990 #if defined(PETSC_HAVE_HYPRE)
2991   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
2992 #endif
2993   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2994   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2995   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2996   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2997   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2998   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2999   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
3000   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3001 
3002   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3003   ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr);
3004   if (flg) {
3005     PetscReal fact = 1.39;
3006     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3007     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3008     if (fact <= 1.0) fact = 1.39;
3009     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3010     ierr = PetscInfo(B,"Hash table Factor used %5.2g\n",(double)fact);CHKERRQ(ierr);
3011   }
3012   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3013   PetscFunctionReturn(0);
3014 }
3015 
3016 /*MC
3017    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3018 
3019    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3020    and MATMPIBAIJ otherwise.
3021 
3022    Options Database Keys:
3023 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3024 
3025   Level: beginner
3026 
3027 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3028 M*/
3029 
3030 /*@C
3031    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3032    (block compressed row).  For good matrix assembly performance
3033    the user should preallocate the matrix storage by setting the parameters
3034    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3035    performance can be increased by more than a factor of 50.
3036 
3037    Collective on Mat
3038 
3039    Input Parameters:
3040 +  B - the matrix
3041 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3042           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3043 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3044            submatrix  (same for all local rows)
3045 .  d_nnz - array containing the number of block nonzeros in the various block rows
3046            of the in diagonal portion of the local (possibly different for each block
3047            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3048            set it even if it is zero.
3049 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3050            submatrix (same for all local rows).
3051 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3052            off-diagonal portion of the local submatrix (possibly different for
3053            each block row) or NULL.
3054 
3055    If the *_nnz parameter is given then the *_nz parameter is ignored
3056 
3057    Options Database Keys:
3058 +   -mat_block_size - size of the blocks to use
3059 -   -mat_use_hash_table <fact>
3060 
3061    Notes:
3062    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3063    than it must be used on all processors that share the object for that argument.
3064 
3065    Storage Information:
3066    For a square global matrix we define each processor's diagonal portion
3067    to be its local rows and the corresponding columns (a square submatrix);
3068    each processor's off-diagonal portion encompasses the remainder of the
3069    local matrix (a rectangular submatrix).
3070 
3071    The user can specify preallocated storage for the diagonal part of
3072    the local submatrix with either d_nz or d_nnz (not both).  Set
3073    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3074    memory allocation.  Likewise, specify preallocated storage for the
3075    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3076 
3077    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3078    the figure below we depict these three local rows and all columns (0-11).
3079 
3080 .vb
3081            0 1 2 3 4 5 6 7 8 9 10 11
3082           --------------------------
3083    row 3  |o o o d d d o o o o  o  o
3084    row 4  |o o o d d d o o o o  o  o
3085    row 5  |o o o d d d o o o o  o  o
3086           --------------------------
3087 .ve
3088 
3089    Thus, any entries in the d locations are stored in the d (diagonal)
3090    submatrix, and any entries in the o locations are stored in the
3091    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3092    stored simply in the MATSEQBAIJ format for compressed row storage.
3093 
3094    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3095    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3096    In general, for PDE problems in which most nonzeros are near the diagonal,
3097    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3098    or you will get TERRIBLE performance; see the users' manual chapter on
3099    matrices.
3100 
3101    You can call MatGetInfo() to get information on how effective the preallocation was;
3102    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3103    You can also run with the option -info and look for messages with the string
3104    malloc in them to see if additional memory allocation was needed.
3105 
3106    Level: intermediate
3107 
3108 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3109 @*/
3110 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3111 {
3112   PetscErrorCode ierr;
3113 
3114   PetscFunctionBegin;
3115   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3116   PetscValidType(B,1);
3117   PetscValidLogicalCollectiveInt(B,bs,2);
3118   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3119   PetscFunctionReturn(0);
3120 }
3121 
3122 /*@C
3123    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3124    (block compressed row).  For good matrix assembly performance
3125    the user should preallocate the matrix storage by setting the parameters
3126    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3127    performance can be increased by more than a factor of 50.
3128 
3129    Collective
3130 
3131    Input Parameters:
3132 +  comm - MPI communicator
3133 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3134           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3135 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3136            This value should be the same as the local size used in creating the
3137            y vector for the matrix-vector product y = Ax.
3138 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3139            This value should be the same as the local size used in creating the
3140            x vector for the matrix-vector product y = Ax.
3141 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3142 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3143 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3144            submatrix  (same for all local rows)
3145 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3146            of the in diagonal portion of the local (possibly different for each block
3147            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3148            and set it even if it is zero.
3149 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3150            submatrix (same for all local rows).
3151 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3152            off-diagonal portion of the local submatrix (possibly different for
3153            each block row) or NULL.
3154 
3155    Output Parameter:
3156 .  A - the matrix
3157 
3158    Options Database Keys:
3159 +   -mat_block_size - size of the blocks to use
3160 -   -mat_use_hash_table <fact>
3161 
3162    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3163    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3164    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3165 
3166    Notes:
3167    If the *_nnz parameter is given then the *_nz parameter is ignored
3168 
3169    A nonzero block is any block that as 1 or more nonzeros in it
3170 
3171    The user MUST specify either the local or global matrix dimensions
3172    (possibly both).
3173 
3174    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3175    than it must be used on all processors that share the object for that argument.
3176 
3177    Storage Information:
3178    For a square global matrix we define each processor's diagonal portion
3179    to be its local rows and the corresponding columns (a square submatrix);
3180    each processor's off-diagonal portion encompasses the remainder of the
3181    local matrix (a rectangular submatrix).
3182 
3183    The user can specify preallocated storage for the diagonal part of
3184    the local submatrix with either d_nz or d_nnz (not both).  Set
3185    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3186    memory allocation.  Likewise, specify preallocated storage for the
3187    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3188 
3189    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3190    the figure below we depict these three local rows and all columns (0-11).
3191 
3192 .vb
3193            0 1 2 3 4 5 6 7 8 9 10 11
3194           --------------------------
3195    row 3  |o o o d d d o o o o  o  o
3196    row 4  |o o o d d d o o o o  o  o
3197    row 5  |o o o d d d o o o o  o  o
3198           --------------------------
3199 .ve
3200 
3201    Thus, any entries in the d locations are stored in the d (diagonal)
3202    submatrix, and any entries in the o locations are stored in the
3203    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3204    stored simply in the MATSEQBAIJ format for compressed row storage.
3205 
3206    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3207    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3208    In general, for PDE problems in which most nonzeros are near the diagonal,
3209    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3210    or you will get TERRIBLE performance; see the users' manual chapter on
3211    matrices.
3212 
3213    Level: intermediate
3214 
3215 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3216 @*/
3217 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3218 {
3219   PetscErrorCode ierr;
3220   PetscMPIInt    size;
3221 
3222   PetscFunctionBegin;
3223   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3224   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3225   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3226   if (size > 1) {
3227     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3228     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3229   } else {
3230     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3231     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3232   }
3233   PetscFunctionReturn(0);
3234 }
3235 
3236 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3237 {
3238   Mat            mat;
3239   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3240   PetscErrorCode ierr;
3241   PetscInt       len=0;
3242 
3243   PetscFunctionBegin;
3244   *newmat = NULL;
3245   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3246   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3247   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3248 
3249   mat->factortype   = matin->factortype;
3250   mat->preallocated = PETSC_TRUE;
3251   mat->assembled    = PETSC_TRUE;
3252   mat->insertmode   = NOT_SET_VALUES;
3253 
3254   a             = (Mat_MPIBAIJ*)mat->data;
3255   mat->rmap->bs = matin->rmap->bs;
3256   a->bs2        = oldmat->bs2;
3257   a->mbs        = oldmat->mbs;
3258   a->nbs        = oldmat->nbs;
3259   a->Mbs        = oldmat->Mbs;
3260   a->Nbs        = oldmat->Nbs;
3261 
3262   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3263   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3264 
3265   a->size         = oldmat->size;
3266   a->rank         = oldmat->rank;
3267   a->donotstash   = oldmat->donotstash;
3268   a->roworiented  = oldmat->roworiented;
3269   a->rowindices   = NULL;
3270   a->rowvalues    = NULL;
3271   a->getrowactive = PETSC_FALSE;
3272   a->barray       = NULL;
3273   a->rstartbs     = oldmat->rstartbs;
3274   a->rendbs       = oldmat->rendbs;
3275   a->cstartbs     = oldmat->cstartbs;
3276   a->cendbs       = oldmat->cendbs;
3277 
3278   /* hash table stuff */
3279   a->ht           = NULL;
3280   a->hd           = NULL;
3281   a->ht_size      = 0;
3282   a->ht_flag      = oldmat->ht_flag;
3283   a->ht_fact      = oldmat->ht_fact;
3284   a->ht_total_ct  = 0;
3285   a->ht_insert_ct = 0;
3286 
3287   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1);CHKERRQ(ierr);
3288   if (oldmat->colmap) {
3289 #if defined(PETSC_USE_CTABLE)
3290     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3291 #else
3292     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
3293     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3294     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
3295 #endif
3296   } else a->colmap = NULL;
3297 
3298   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3299     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3300     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3301     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
3302   } else a->garray = NULL;
3303 
3304   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3305   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3306   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3307   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3308   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3309 
3310   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3311   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3312   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3313   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3314   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3315   *newmat = mat;
3316   PetscFunctionReturn(0);
3317 }
3318 
3319 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3320 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
3321 {
3322   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3323   PetscInt       *rowidxs,*colidxs,rs,cs,ce;
3324   PetscScalar    *matvals;
3325   PetscErrorCode ierr;
3326 
3327   PetscFunctionBegin;
3328   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3329 
3330   /* read in matrix header */
3331   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3332   PetscCheckFalse(header[0] != MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3333   M  = header[1]; N = header[2]; nz = header[3];
3334   PetscCheckFalse(M < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
3335   PetscCheckFalse(N < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
3336   PetscCheckFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ");
3337 
3338   /* set block sizes from the viewer's .info file */
3339   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3340   /* set local sizes if not set already */
3341   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3342   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3343   /* set global sizes if not set already */
3344   if (mat->rmap->N < 0) mat->rmap->N = M;
3345   if (mat->cmap->N < 0) mat->cmap->N = N;
3346   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3347   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3348 
3349   /* check if the matrix sizes are correct */
3350   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3351   PetscCheckFalse(M != rows || N != cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
3352   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3353   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
3354   ierr = PetscLayoutGetRange(mat->rmap,&rs,NULL);CHKERRQ(ierr);
3355   ierr = PetscLayoutGetRange(mat->cmap,&cs,&ce);CHKERRQ(ierr);
3356   mbs = m/bs; nbs = n/bs;
3357 
3358   /* read in row lengths and build row indices */
3359   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3360   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3361   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3362   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr);
3363   PetscCheckFalse(sum != nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
3364 
3365   /* read in column indices and matrix values */
3366   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3367   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3368   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3369 
3370   { /* preallocate matrix storage */
3371     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3372     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3373     PetscBool  sbaij,done;
3374     PetscInt   *d_nnz,*o_nnz;
3375 
3376     ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr);
3377     ierr = PetscHSetICreate(&ht);CHKERRQ(ierr);
3378     ierr = PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz);CHKERRQ(ierr);
3379     ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij);CHKERRQ(ierr);
3380     for (i=0; i<mbs; i++) {
3381       ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr);
3382       ierr = PetscHSetIClear(ht);CHKERRQ(ierr);
3383       for (k=0; k<bs; k++) {
3384         PetscInt row = bs*i + k;
3385         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3386           PetscInt col = colidxs[j];
3387           if (!sbaij || col >= row) {
3388             if (col >= cs && col < ce) {
3389               if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++;
3390             } else {
3391               ierr = PetscHSetIQueryAdd(ht,col/bs,&done);CHKERRQ(ierr);
3392               if (done) o_nnz[i]++;
3393             }
3394           }
3395         }
3396       }
3397     }
3398     ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
3399     ierr = PetscHSetIDestroy(&ht);CHKERRQ(ierr);
3400     ierr = MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3401     ierr = MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3402     ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3403   }
3404 
3405   /* store matrix values */
3406   for (i=0; i<m; i++) {
3407     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1];
3408     ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr);
3409   }
3410 
3411   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3412   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3413   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3414   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3415   PetscFunctionReturn(0);
3416 }
3417 
3418 PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer)
3419 {
3420   PetscErrorCode ierr;
3421   PetscBool      isbinary;
3422 
3423   PetscFunctionBegin;
3424   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3425   PetscCheckFalse(!isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3426   ierr = MatLoad_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
3427   PetscFunctionReturn(0);
3428 }
3429 
3430 /*@
3431    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3432 
3433    Input Parameters:
3434 +  mat  - the matrix
3435 -  fact - factor
3436 
3437    Not Collective, each process can use a different factor
3438 
3439    Level: advanced
3440 
3441   Notes:
3442    This can also be set by the command line option: -mat_use_hash_table <fact>
3443 
3444 .seealso: MatSetOption()
3445 @*/
3446 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3447 {
3448   PetscErrorCode ierr;
3449 
3450   PetscFunctionBegin;
3451   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3452   PetscFunctionReturn(0);
3453 }
3454 
3455 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3456 {
3457   Mat_MPIBAIJ *baij;
3458 
3459   PetscFunctionBegin;
3460   baij          = (Mat_MPIBAIJ*)mat->data;
3461   baij->ht_fact = fact;
3462   PetscFunctionReturn(0);
3463 }
3464 
3465 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3466 {
3467   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3468   PetscBool      flg;
3469   PetscErrorCode ierr;
3470 
3471   PetscFunctionBegin;
3472   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr);
3473   PetscCheckFalse(!flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input");
3474   if (Ad)     *Ad     = a->A;
3475   if (Ao)     *Ao     = a->B;
3476   if (colmap) *colmap = a->garray;
3477   PetscFunctionReturn(0);
3478 }
3479 
3480 /*
3481     Special version for direct calls from Fortran (to eliminate two function call overheads
3482 */
3483 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3484 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3485 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3486 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3487 #endif
3488 
3489 /*@C
3490   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3491 
3492   Collective on Mat
3493 
3494   Input Parameters:
3495 + mat - the matrix
3496 . min - number of input rows
3497 . im - input rows
3498 . nin - number of input columns
3499 . in - input columns
3500 . v - numerical values input
3501 - addvin - INSERT_VALUES or ADD_VALUES
3502 
3503   Notes:
3504     This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3505 
3506   Level: advanced
3507 
3508 .seealso:   MatSetValuesBlocked()
3509 @*/
3510 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3511 {
3512   /* convert input arguments to C version */
3513   Mat        mat  = *matin;
3514   PetscInt   m    = *min, n = *nin;
3515   InsertMode addv = *addvin;
3516 
3517   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3518   const MatScalar *value;
3519   MatScalar       *barray     = baij->barray;
3520   PetscBool       roworiented = baij->roworiented;
3521   PetscErrorCode  ierr;
3522   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3523   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3524   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3525 
3526   PetscFunctionBegin;
3527   /* tasks normally handled by MatSetValuesBlocked() */
3528   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3529   else PetscCheckFalse(mat->insertmode != addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3530   PetscCheckFalse(mat->factortype,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3531   if (mat->assembled) {
3532     mat->was_assembled = PETSC_TRUE;
3533     mat->assembled     = PETSC_FALSE;
3534   }
3535   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3536 
3537   if (!barray) {
3538     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3539     baij->barray = barray;
3540   }
3541 
3542   if (roworiented) stepval = (n-1)*bs;
3543   else stepval = (m-1)*bs;
3544 
3545   for (i=0; i<m; i++) {
3546     if (im[i] < 0) continue;
3547     PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1);
3548     if (im[i] >= rstart && im[i] < rend) {
3549       row = im[i] - rstart;
3550       for (j=0; j<n; j++) {
3551         /* If NumCol = 1 then a copy is not required */
3552         if ((roworiented) && (n == 1)) {
3553           barray = (MatScalar*)v + i*bs2;
3554         } else if ((!roworiented) && (m == 1)) {
3555           barray = (MatScalar*)v + j*bs2;
3556         } else { /* Here a copy is required */
3557           if (roworiented) {
3558             value = v + i*(stepval+bs)*bs + j*bs;
3559           } else {
3560             value = v + j*(stepval+bs)*bs + i*bs;
3561           }
3562           for (ii=0; ii<bs; ii++,value+=stepval) {
3563             for (jj=0; jj<bs; jj++) {
3564               *barray++ = *value++;
3565             }
3566           }
3567           barray -=bs2;
3568         }
3569 
3570         if (in[j] >= cstart && in[j] < cend) {
3571           col  = in[j] - cstart;
3572           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3573         } else if (in[j] < 0) continue;
3574         else PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1);
3575         else {
3576           if (mat->was_assembled) {
3577             if (!baij->colmap) {
3578               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3579             }
3580 
3581 #if defined(PETSC_USE_DEBUG)
3582 #if defined(PETSC_USE_CTABLE)
3583             { PetscInt data;
3584               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3585               PetscCheckFalse((data - 1) % bs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3586             }
3587 #else
3588             PetscCheckFalse((baij->colmap[in[j]] - 1) % bs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3589 #endif
3590 #endif
3591 #if defined(PETSC_USE_CTABLE)
3592             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3593             col  = (col - 1)/bs;
3594 #else
3595             col = (baij->colmap[in[j]] - 1)/bs;
3596 #endif
3597             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3598               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3599               col  =  in[j];
3600             }
3601           } else col = in[j];
3602           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3603         }
3604       }
3605     } else {
3606       if (!baij->donotstash) {
3607         if (roworiented) {
3608           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3609         } else {
3610           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3611         }
3612       }
3613     }
3614   }
3615 
3616   /* task normally handled by MatSetValuesBlocked() */
3617   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3618   PetscFunctionReturn(0);
3619 }
3620 
3621 /*@
3622      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block
3623          CSR format the local rows.
3624 
3625    Collective
3626 
3627    Input Parameters:
3628 +  comm - MPI communicator
3629 .  bs - the block size, only a block size of 1 is supported
3630 .  m - number of local rows (Cannot be PETSC_DECIDE)
3631 .  n - This value should be the same as the local size used in creating the
3632        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3633        calculated if N is given) For square matrices n is almost always m.
3634 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3635 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3636 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3637 .   j - column indices
3638 -   a - matrix values
3639 
3640    Output Parameter:
3641 .   mat - the matrix
3642 
3643    Level: intermediate
3644 
3645    Notes:
3646        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3647      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3648      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3649 
3650      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3651      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3652      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3653      with column-major ordering within blocks.
3654 
3655        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3656 
3657 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3658           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3659 @*/
3660 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3661 {
3662   PetscErrorCode ierr;
3663 
3664   PetscFunctionBegin;
3665   PetscCheckFalse(i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3666   PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3667   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3668   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3669   ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr);
3670   ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr);
3671   ierr = MatSetUp(*mat);CHKERRQ(ierr);
3672   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3673   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3674   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3675   PetscFunctionReturn(0);
3676 }
3677 
3678 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3679 {
3680   PetscErrorCode ierr;
3681   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3682   PetscInt       *indx;
3683   PetscScalar    *values;
3684 
3685   PetscFunctionBegin;
3686   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3687   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3688     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
3689     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3690     PetscInt       *bindx,rmax=a->rmax,j;
3691     PetscMPIInt    rank,size;
3692 
3693     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3694     mbs = m/bs; Nbs = N/cbs;
3695     if (n == PETSC_DECIDE) {
3696       ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);CHKERRQ(ierr);
3697     }
3698     nbs = n/cbs;
3699 
3700     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3701     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
3702 
3703     ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3704     ierr = MPI_Comm_rank(comm,&size);CHKERRMPI(ierr);
3705     if (rank == size-1) {
3706       /* Check sum(nbs) = Nbs */
3707       PetscCheckFalse(__end != Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT,__end,Nbs);
3708     }
3709 
3710     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3711     for (i=0; i<mbs; i++) {
3712       ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3713       nnz = nnz/bs;
3714       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3715       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3716       ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3717     }
3718     ierr = PetscFree(bindx);CHKERRQ(ierr);
3719 
3720     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3721     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3722     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3723     ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr);
3724     ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3725     ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3726     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3727     ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3728   }
3729 
3730   /* numeric phase */
3731   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3732   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3733 
3734   for (i=0; i<m; i++) {
3735     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3736     Ii   = i + rstart;
3737     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3738     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3739   }
3740   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3741   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3742   PetscFunctionReturn(0);
3743 }
3744