xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 4e8afd12df985612b40e26aef947da2f566aee4e)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
12 {
13   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
14   PetscErrorCode ierr;
15   PetscInt       i,*idxb = 0;
16   PetscScalar    *va,*vb;
17   Vec            vtmp;
18 
19   PetscFunctionBegin;
20   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
21   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
22   if (idx) {
23     for (i=0; i<A->rmap->n; i++) {
24       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
25     }
26   }
27 
28   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
29   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
30   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
31   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
32 
33   for (i=0; i<A->rmap->n; i++) {
34     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
35       va[i] = vb[i];
36       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
37     }
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
48 {
49   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 
58 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
59 {
60   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
61   PetscErrorCode ierr;
62 
63   PetscFunctionBegin;
64   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
65   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 /*
70      Local utility routine that creates a mapping from the global column
71    number to the local number in the off-diagonal part of the local
72    storage of the matrix.  This is done in a non scalable way since the
73    length of colmap equals the global matrix length.
74 */
75 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
76 {
77   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
78   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
79   PetscErrorCode ierr;
80   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
81 
82   PetscFunctionBegin;
83 #if defined(PETSC_USE_CTABLE)
84   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
85   for (i=0; i<nbs; i++) {
86     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
87   }
88 #else
89   ierr = PetscCalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
90   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
91   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
92 #endif
93   PetscFunctionReturn(0);
94 }
95 
96 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol)       \
97   { \
98     brow = row/bs;  \
99     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
100     rmax = aimax[brow]; nrow = ailen[brow]; \
101     bcol = col/bs; \
102     ridx = row % bs; cidx = col % bs; \
103     low  = 0; high = nrow; \
104     while (high-low > 3) { \
105       t = (low+high)/2; \
106       if (rp[t] > bcol) high = t; \
107       else              low  = t; \
108     } \
109     for (_i=low; _i<high; _i++) { \
110       if (rp[_i] > bcol) break; \
111       if (rp[_i] == bcol) { \
112         bap = ap +  bs2*_i + bs*cidx + ridx; \
113         if (addv == ADD_VALUES) *bap += value;  \
114         else                    *bap  = value;  \
115         goto a_noinsert; \
116       } \
117     } \
118     if (a->nonew == 1) goto a_noinsert; \
119     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
120     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
121     N = nrow++ - 1;  \
122     /* shift up all the later entries in this row */ \
123     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
124     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
125     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
126     rp[_i]                      = bcol;  \
127     ap[bs2*_i + bs*cidx + ridx] = value;  \
128 a_noinsert:; \
129     ailen[brow] = nrow; \
130   }
131 
132 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol)       \
133   { \
134     brow = row/bs;  \
135     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
136     rmax = bimax[brow]; nrow = bilen[brow]; \
137     bcol = col/bs; \
138     ridx = row % bs; cidx = col % bs; \
139     low  = 0; high = nrow; \
140     while (high-low > 3) { \
141       t = (low+high)/2; \
142       if (rp[t] > bcol) high = t; \
143       else              low  = t; \
144     } \
145     for (_i=low; _i<high; _i++) { \
146       if (rp[_i] > bcol) break; \
147       if (rp[_i] == bcol) { \
148         bap = ap +  bs2*_i + bs*cidx + ridx; \
149         if (addv == ADD_VALUES) *bap += value;  \
150         else                    *bap  = value;  \
151         goto b_noinsert; \
152       } \
153     } \
154     if (b->nonew == 1) goto b_noinsert; \
155     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column  (%D, %D) into matrix", orow, ocol); \
156     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
157     N = nrow++ - 1;  \
158     /* shift up all the later entries in this row */ \
159     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
160     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr);\
161     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
162     rp[_i]                      = bcol;  \
163     ap[bs2*_i + bs*cidx + ridx] = value;  \
164 b_noinsert:; \
165     bilen[brow] = nrow; \
166   }
167 
168 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
169 {
170   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
171   MatScalar      value;
172   PetscBool      roworiented = baij->roworiented;
173   PetscErrorCode ierr;
174   PetscInt       i,j,row,col;
175   PetscInt       rstart_orig=mat->rmap->rstart;
176   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
177   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
178 
179   /* Some Variables required in the macro */
180   Mat         A     = baij->A;
181   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
182   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
183   MatScalar   *aa   =a->a;
184 
185   Mat         B     = baij->B;
186   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
187   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
188   MatScalar   *ba   =b->a;
189 
190   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
191   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
192   MatScalar *ap,*bap;
193 
194   PetscFunctionBegin;
195   for (i=0; i<m; i++) {
196     if (im[i] < 0) continue;
197 #if defined(PETSC_USE_DEBUG)
198     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
199 #endif
200     if (im[i] >= rstart_orig && im[i] < rend_orig) {
201       row = im[i] - rstart_orig;
202       for (j=0; j<n; j++) {
203         if (in[j] >= cstart_orig && in[j] < cend_orig) {
204           col = in[j] - cstart_orig;
205           if (roworiented) value = v[i*n+j];
206           else             value = v[i+j*m];
207           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
208         } else if (in[j] < 0) continue;
209 #if defined(PETSC_USE_DEBUG)
210         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
211 #endif
212         else {
213           if (mat->was_assembled) {
214             if (!baij->colmap) {
215               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
216             }
217 #if defined(PETSC_USE_CTABLE)
218             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
219             col  = col - 1;
220 #else
221             col = baij->colmap[in[j]/bs] - 1;
222 #endif
223             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
224               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
225               col  =  in[j];
226               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
227               B    = baij->B;
228               b    = (Mat_SeqBAIJ*)(B)->data;
229               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
230               ba   =b->a;
231             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
232             else col += in[j]%bs;
233           } else col = in[j];
234           if (roworiented) value = v[i*n+j];
235           else             value = v[i+j*m];
236           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
237           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
238         }
239       }
240     } else {
241       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
242       if (!baij->donotstash) {
243         mat->assembled = PETSC_FALSE;
244         if (roworiented) {
245           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
246         } else {
247           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
248         }
249       }
250     }
251   }
252   PetscFunctionReturn(0);
253 }
254 
255 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
256 {
257   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
258   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
259   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
260   PetscErrorCode    ierr;
261   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
262   PetscBool         roworiented=a->roworiented;
263   const PetscScalar *value     = v;
264   MatScalar         *ap,*aa = a->a,*bap;
265 
266   PetscFunctionBegin;
267   rp   = aj + ai[row];
268   ap   = aa + bs2*ai[row];
269   rmax = imax[row];
270   nrow = ailen[row];
271   value = v;
272   low = 0;
273   high = nrow;
274   while (high-low > 7) {
275     t = (low+high)/2;
276     if (rp[t] > col) high = t;
277     else             low  = t;
278   }
279   for (i=low; i<high; i++) {
280     if (rp[i] > col) break;
281     if (rp[i] == col) {
282       bap = ap +  bs2*i;
283       if (roworiented) {
284         if (is == ADD_VALUES) {
285           for (ii=0; ii<bs; ii++) {
286             for (jj=ii; jj<bs2; jj+=bs) {
287               bap[jj] += *value++;
288             }
289           }
290         } else {
291           for (ii=0; ii<bs; ii++) {
292             for (jj=ii; jj<bs2; jj+=bs) {
293               bap[jj] = *value++;
294             }
295           }
296         }
297       } else {
298         if (is == ADD_VALUES) {
299           for (ii=0; ii<bs; ii++,value+=bs) {
300             for (jj=0; jj<bs; jj++) {
301               bap[jj] += value[jj];
302             }
303             bap += bs;
304           }
305         } else {
306           for (ii=0; ii<bs; ii++,value+=bs) {
307             for (jj=0; jj<bs; jj++) {
308               bap[jj]  = value[jj];
309             }
310             bap += bs;
311           }
312         }
313       }
314       goto noinsert2;
315     }
316   }
317   if (nonew == 1) goto noinsert2;
318   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
319   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
320   N = nrow++ - 1; high++;
321   /* shift up all the later entries in this row */
322   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
323   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
324   rp[i] = col;
325   bap   = ap +  bs2*i;
326   if (roworiented) {
327     for (ii=0; ii<bs; ii++) {
328       for (jj=ii; jj<bs2; jj+=bs) {
329         bap[jj] = *value++;
330       }
331     }
332   } else {
333     for (ii=0; ii<bs; ii++) {
334       for (jj=0; jj<bs; jj++) {
335         *bap++ = *value++;
336       }
337     }
338   }
339   noinsert2:;
340   ailen[row] = nrow;
341   PetscFunctionReturn(0);
342 }
343 
344 /*
345     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
346     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
347 */
348 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
349 {
350   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
351   const PetscScalar *value;
352   MatScalar         *barray     = baij->barray;
353   PetscBool         roworiented = baij->roworiented;
354   PetscErrorCode    ierr;
355   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
356   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
357   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
358 
359   PetscFunctionBegin;
360   if (!barray) {
361     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
362     baij->barray = barray;
363   }
364 
365   if (roworiented) stepval = (n-1)*bs;
366   else stepval = (m-1)*bs;
367 
368   for (i=0; i<m; i++) {
369     if (im[i] < 0) continue;
370 #if defined(PETSC_USE_DEBUG)
371     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
372 #endif
373     if (im[i] >= rstart && im[i] < rend) {
374       row = im[i] - rstart;
375       for (j=0; j<n; j++) {
376         /* If NumCol = 1 then a copy is not required */
377         if ((roworiented) && (n == 1)) {
378           barray = (MatScalar*)v + i*bs2;
379         } else if ((!roworiented) && (m == 1)) {
380           barray = (MatScalar*)v + j*bs2;
381         } else { /* Here a copy is required */
382           if (roworiented) {
383             value = v + (i*(stepval+bs) + j)*bs;
384           } else {
385             value = v + (j*(stepval+bs) + i)*bs;
386           }
387           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
388             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
389             barray += bs;
390           }
391           barray -= bs2;
392         }
393 
394         if (in[j] >= cstart && in[j] < cend) {
395           col  = in[j] - cstart;
396           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
397         } else if (in[j] < 0) continue;
398 #if defined(PETSC_USE_DEBUG)
399         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
400 #endif
401         else {
402           if (mat->was_assembled) {
403             if (!baij->colmap) {
404               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
405             }
406 
407 #if defined(PETSC_USE_DEBUG)
408 #if defined(PETSC_USE_CTABLE)
409             { PetscInt data;
410               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
411               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
412             }
413 #else
414             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
415 #endif
416 #endif
417 #if defined(PETSC_USE_CTABLE)
418             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
419             col  = (col - 1)/bs;
420 #else
421             col = (baij->colmap[in[j]] - 1)/bs;
422 #endif
423             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
424               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
425               col  =  in[j];
426             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]);
427           } else col = in[j];
428           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
429         }
430       }
431     } else {
432       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
433       if (!baij->donotstash) {
434         if (roworiented) {
435           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
436         } else {
437           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
438         }
439       }
440     }
441   }
442   PetscFunctionReturn(0);
443 }
444 
445 #define HASH_KEY 0.6180339887
446 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
447 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
448 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
449 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
450 {
451   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
452   PetscBool      roworiented = baij->roworiented;
453   PetscErrorCode ierr;
454   PetscInt       i,j,row,col;
455   PetscInt       rstart_orig=mat->rmap->rstart;
456   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
457   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
458   PetscReal      tmp;
459   MatScalar      **HD = baij->hd,value;
460 #if defined(PETSC_USE_DEBUG)
461   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
462 #endif
463 
464   PetscFunctionBegin;
465   for (i=0; i<m; i++) {
466 #if defined(PETSC_USE_DEBUG)
467     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
468     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
469 #endif
470     row = im[i];
471     if (row >= rstart_orig && row < rend_orig) {
472       for (j=0; j<n; j++) {
473         col = in[j];
474         if (roworiented) value = v[i*n+j];
475         else             value = v[i+j*m];
476         /* Look up PetscInto the Hash Table */
477         key = (row/bs)*Nbs+(col/bs)+1;
478         h1  = HASH(size,key,tmp);
479 
480 
481         idx = h1;
482 #if defined(PETSC_USE_DEBUG)
483         insert_ct++;
484         total_ct++;
485         if (HT[idx] != key) {
486           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
487           if (idx == size) {
488             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
489             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
490           }
491         }
492 #else
493         if (HT[idx] != key) {
494           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
495           if (idx == size) {
496             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
497             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
498           }
499         }
500 #endif
501         /* A HASH table entry is found, so insert the values at the correct address */
502         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
503         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
504       }
505     } else if (!baij->donotstash) {
506       if (roworiented) {
507         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
508       } else {
509         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
510       }
511     }
512   }
513 #if defined(PETSC_USE_DEBUG)
514   baij->ht_total_ct  += total_ct;
515   baij->ht_insert_ct += insert_ct;
516 #endif
517   PetscFunctionReturn(0);
518 }
519 
520 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
521 {
522   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
523   PetscBool         roworiented = baij->roworiented;
524   PetscErrorCode    ierr;
525   PetscInt          i,j,ii,jj,row,col;
526   PetscInt          rstart=baij->rstartbs;
527   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
528   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
529   PetscReal         tmp;
530   MatScalar         **HD = baij->hd,*baij_a;
531   const PetscScalar *v_t,*value;
532 #if defined(PETSC_USE_DEBUG)
533   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
534 #endif
535 
536   PetscFunctionBegin;
537   if (roworiented) stepval = (n-1)*bs;
538   else stepval = (m-1)*bs;
539 
540   for (i=0; i<m; i++) {
541 #if defined(PETSC_USE_DEBUG)
542     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
543     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
544 #endif
545     row = im[i];
546     v_t = v + i*nbs2;
547     if (row >= rstart && row < rend) {
548       for (j=0; j<n; j++) {
549         col = in[j];
550 
551         /* Look up into the Hash Table */
552         key = row*Nbs+col+1;
553         h1  = HASH(size,key,tmp);
554 
555         idx = h1;
556 #if defined(PETSC_USE_DEBUG)
557         total_ct++;
558         insert_ct++;
559         if (HT[idx] != key) {
560           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
561           if (idx == size) {
562             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
563             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
564           }
565         }
566 #else
567         if (HT[idx] != key) {
568           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
569           if (idx == size) {
570             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
571             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
572           }
573         }
574 #endif
575         baij_a = HD[idx];
576         if (roworiented) {
577           /*value = v + i*(stepval+bs)*bs + j*bs;*/
578           /* value = v + (i*(stepval+bs)+j)*bs; */
579           value = v_t;
580           v_t  += bs;
581           if (addv == ADD_VALUES) {
582             for (ii=0; ii<bs; ii++,value+=stepval) {
583               for (jj=ii; jj<bs2; jj+=bs) {
584                 baij_a[jj] += *value++;
585               }
586             }
587           } else {
588             for (ii=0; ii<bs; ii++,value+=stepval) {
589               for (jj=ii; jj<bs2; jj+=bs) {
590                 baij_a[jj] = *value++;
591               }
592             }
593           }
594         } else {
595           value = v + j*(stepval+bs)*bs + i*bs;
596           if (addv == ADD_VALUES) {
597             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
598               for (jj=0; jj<bs; jj++) {
599                 baij_a[jj] += *value++;
600               }
601             }
602           } else {
603             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
604               for (jj=0; jj<bs; jj++) {
605                 baij_a[jj] = *value++;
606               }
607             }
608           }
609         }
610       }
611     } else {
612       if (!baij->donotstash) {
613         if (roworiented) {
614           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
615         } else {
616           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
617         }
618       }
619     }
620   }
621 #if defined(PETSC_USE_DEBUG)
622   baij->ht_total_ct  += total_ct;
623   baij->ht_insert_ct += insert_ct;
624 #endif
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
633   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
638     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
640       row = idxm[i] - bsrstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
643         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= bscstart && idxn[j] < bscend) {
645           col  = idxn[j] - bscstart;
646           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647         } else {
648           if (!baij->colmap) {
649             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
650           }
651 #if defined(PETSC_USE_CTABLE)
652           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
653           data--;
654 #else
655           data = baij->colmap[idxn[j]/bs]-1;
656 #endif
657           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
658           else {
659             col  = data + idxn[j]%bs;
660             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
661           }
662         }
663       }
664     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
665   }
666   PetscFunctionReturn(0);
667 }
668 
669 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
670 {
671   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
672   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
673   PetscErrorCode ierr;
674   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
675   PetscReal      sum = 0.0;
676   MatScalar      *v;
677 
678   PetscFunctionBegin;
679   if (baij->size == 1) {
680     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
681   } else {
682     if (type == NORM_FROBENIUS) {
683       v  = amat->a;
684       nz = amat->nz*bs2;
685       for (i=0; i<nz; i++) {
686         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
687       }
688       v  = bmat->a;
689       nz = bmat->nz*bs2;
690       for (i=0; i<nz; i++) {
691         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
692       }
693       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
694       *nrm = PetscSqrtReal(*nrm);
695     } else if (type == NORM_1) { /* max column sum */
696       PetscReal *tmp,*tmp2;
697       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
698       ierr = PetscCalloc1(mat->cmap->N,&tmp);CHKERRQ(ierr);
699       ierr = PetscMalloc1(mat->cmap->N,&tmp2);CHKERRQ(ierr);
700       v    = amat->a; jj = amat->j;
701       for (i=0; i<amat->nz; i++) {
702         for (j=0; j<bs; j++) {
703           col = bs*(cstart + *jj) + j; /* column index */
704           for (row=0; row<bs; row++) {
705             tmp[col] += PetscAbsScalar(*v);  v++;
706           }
707         }
708         jj++;
709       }
710       v = bmat->a; jj = bmat->j;
711       for (i=0; i<bmat->nz; i++) {
712         for (j=0; j<bs; j++) {
713           col = bs*garray[*jj] + j;
714           for (row=0; row<bs; row++) {
715             tmp[col] += PetscAbsScalar(*v); v++;
716           }
717         }
718         jj++;
719       }
720       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
721       *nrm = 0.0;
722       for (j=0; j<mat->cmap->N; j++) {
723         if (tmp2[j] > *nrm) *nrm = tmp2[j];
724       }
725       ierr = PetscFree(tmp);CHKERRQ(ierr);
726       ierr = PetscFree(tmp2);CHKERRQ(ierr);
727     } else if (type == NORM_INFINITY) { /* max row sum */
728       PetscReal *sums;
729       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
730       sum  = 0.0;
731       for (j=0; j<amat->mbs; j++) {
732         for (row=0; row<bs; row++) sums[row] = 0.0;
733         v  = amat->a + bs2*amat->i[j];
734         nz = amat->i[j+1]-amat->i[j];
735         for (i=0; i<nz; i++) {
736           for (col=0; col<bs; col++) {
737             for (row=0; row<bs; row++) {
738               sums[row] += PetscAbsScalar(*v); v++;
739             }
740           }
741         }
742         v  = bmat->a + bs2*bmat->i[j];
743         nz = bmat->i[j+1]-bmat->i[j];
744         for (i=0; i<nz; i++) {
745           for (col=0; col<bs; col++) {
746             for (row=0; row<bs; row++) {
747               sums[row] += PetscAbsScalar(*v); v++;
748             }
749           }
750         }
751         for (row=0; row<bs; row++) {
752           if (sums[row] > sum) sum = sums[row];
753         }
754       }
755       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
756       ierr = PetscFree(sums);CHKERRQ(ierr);
757     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
758   }
759   PetscFunctionReturn(0);
760 }
761 
762 /*
763   Creates the hash table, and sets the table
764   This table is created only once.
765   If new entried need to be added to the matrix
766   then the hash table has to be destroyed and
767   recreated.
768 */
769 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
770 {
771   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
772   Mat            A     = baij->A,B=baij->B;
773   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
774   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
775   PetscErrorCode ierr;
776   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
777   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
778   PetscInt       *HT,key;
779   MatScalar      **HD;
780   PetscReal      tmp;
781 #if defined(PETSC_USE_INFO)
782   PetscInt ct=0,max=0;
783 #endif
784 
785   PetscFunctionBegin;
786   if (baij->ht) PetscFunctionReturn(0);
787 
788   baij->ht_size = (PetscInt)(factor*nz);
789   ht_size       = baij->ht_size;
790 
791   /* Allocate Memory for Hash Table */
792   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
793   HD   = baij->hd;
794   HT   = baij->ht;
795 
796   /* Loop Over A */
797   for (i=0; i<a->mbs; i++) {
798     for (j=ai[i]; j<ai[i+1]; j++) {
799       row = i+rstart;
800       col = aj[j]+cstart;
801 
802       key = row*Nbs + col + 1;
803       h1  = HASH(ht_size,key,tmp);
804       for (k=0; k<ht_size; k++) {
805         if (!HT[(h1+k)%ht_size]) {
806           HT[(h1+k)%ht_size] = key;
807           HD[(h1+k)%ht_size] = a->a + j*bs2;
808           break;
809 #if defined(PETSC_USE_INFO)
810         } else {
811           ct++;
812 #endif
813         }
814       }
815 #if defined(PETSC_USE_INFO)
816       if (k> max) max = k;
817 #endif
818     }
819   }
820   /* Loop Over B */
821   for (i=0; i<b->mbs; i++) {
822     for (j=bi[i]; j<bi[i+1]; j++) {
823       row = i+rstart;
824       col = garray[bj[j]];
825       key = row*Nbs + col + 1;
826       h1  = HASH(ht_size,key,tmp);
827       for (k=0; k<ht_size; k++) {
828         if (!HT[(h1+k)%ht_size]) {
829           HT[(h1+k)%ht_size] = key;
830           HD[(h1+k)%ht_size] = b->a + j*bs2;
831           break;
832 #if defined(PETSC_USE_INFO)
833         } else {
834           ct++;
835 #endif
836         }
837       }
838 #if defined(PETSC_USE_INFO)
839       if (k> max) max = k;
840 #endif
841     }
842   }
843 
844   /* Print Summary */
845 #if defined(PETSC_USE_INFO)
846   for (i=0,j=0; i<ht_size; i++) {
847     if (HT[i]) j++;
848   }
849   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
850 #endif
851   PetscFunctionReturn(0);
852 }
853 
854 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
855 {
856   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
857   PetscErrorCode ierr;
858   PetscInt       nstash,reallocs;
859 
860   PetscFunctionBegin;
861   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
862 
863   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
864   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
865   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
866   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
867   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
868   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
869   PetscFunctionReturn(0);
870 }
871 
872 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
873 {
874   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
875   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
876   PetscErrorCode ierr;
877   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
878   PetscInt       *row,*col;
879   PetscBool      r1,r2,r3,other_disassembled;
880   MatScalar      *val;
881   PetscMPIInt    n;
882 
883   PetscFunctionBegin;
884   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
885   if (!baij->donotstash && !mat->nooffprocentries) {
886     while (1) {
887       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
888       if (!flg) break;
889 
890       for (i=0; i<n;) {
891         /* Now identify the consecutive vals belonging to the same row */
892         for (j=i,rstart=row[j]; j<n; j++) {
893           if (row[j] != rstart) break;
894         }
895         if (j < n) ncols = j-i;
896         else       ncols = n-i;
897         /* Now assemble all these values with a single function call */
898         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
899         i    = j;
900       }
901     }
902     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
903     /* Now process the block-stash. Since the values are stashed column-oriented,
904        set the roworiented flag to column oriented, and after MatSetValues()
905        restore the original flags */
906     r1 = baij->roworiented;
907     r2 = a->roworiented;
908     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
909 
910     baij->roworiented = PETSC_FALSE;
911     a->roworiented    = PETSC_FALSE;
912 
913     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
914     while (1) {
915       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
916       if (!flg) break;
917 
918       for (i=0; i<n;) {
919         /* Now identify the consecutive vals belonging to the same row */
920         for (j=i,rstart=row[j]; j<n; j++) {
921           if (row[j] != rstart) break;
922         }
923         if (j < n) ncols = j-i;
924         else       ncols = n-i;
925         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
926         i    = j;
927       }
928     }
929     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
930 
931     baij->roworiented = r1;
932     a->roworiented    = r2;
933 
934     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
935   }
936 
937   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
938   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
939 
940   /* determine if any processor has disassembled, if so we must
941      also disassemble ourselfs, in order that we may reassemble. */
942   /*
943      if nonzero structure of submatrix B cannot change then we know that
944      no processor disassembled thus we can skip this stuff
945   */
946   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
947     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
948     if (mat->was_assembled && !other_disassembled) {
949       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
950     }
951   }
952 
953   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
954     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
955   }
956   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
957   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
958 
959 #if defined(PETSC_USE_INFO)
960   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
961     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
962 
963     baij->ht_total_ct  = 0;
964     baij->ht_insert_ct = 0;
965   }
966 #endif
967   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
968     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
969 
970     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
971     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
972   }
973 
974   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
975 
976   baij->rowvalues = 0;
977 
978   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
979   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
980     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
981     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
982   }
983   PetscFunctionReturn(0);
984 }
985 
986 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
987 #include <petscdraw.h>
988 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
989 {
990   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
991   PetscErrorCode    ierr;
992   PetscMPIInt       rank = baij->rank;
993   PetscInt          bs   = mat->rmap->bs;
994   PetscBool         iascii,isdraw;
995   PetscViewer       sviewer;
996   PetscViewerFormat format;
997 
998   PetscFunctionBegin;
999   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1000   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1001   if (iascii) {
1002     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1003     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1004       MatInfo info;
1005       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1006       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1007       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1008       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",
1009                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
1010       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1011       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1012       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1013       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1014       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1015       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1016       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1017       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1018       PetscFunctionReturn(0);
1019     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1020       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1021       PetscFunctionReturn(0);
1022     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1023       PetscFunctionReturn(0);
1024     }
1025   }
1026 
1027   if (isdraw) {
1028     PetscDraw draw;
1029     PetscBool isnull;
1030     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1031     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1032     if (isnull) PetscFunctionReturn(0);
1033   }
1034 
1035   {
1036     /* assemble the entire matrix onto first processor. */
1037     Mat         A;
1038     Mat_SeqBAIJ *Aloc;
1039     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1040     MatScalar   *a;
1041     const char  *matname;
1042 
1043     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1044     /* Perhaps this should be the type of mat? */
1045     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1046     if (!rank) {
1047       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1048     } else {
1049       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1050     }
1051     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1052     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1053     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1054     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1055 
1056     /* copy over the A part */
1057     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1058     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1059     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1060 
1061     for (i=0; i<mbs; i++) {
1062       rvals[0] = bs*(baij->rstartbs + i);
1063       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1064       for (j=ai[i]; j<ai[i+1]; j++) {
1065         col = (baij->cstartbs+aj[j])*bs;
1066         for (k=0; k<bs; k++) {
1067           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1068           col++; a += bs;
1069         }
1070       }
1071     }
1072     /* copy over the B part */
1073     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1074     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1075     for (i=0; i<mbs; i++) {
1076       rvals[0] = bs*(baij->rstartbs + i);
1077       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1078       for (j=ai[i]; j<ai[i+1]; j++) {
1079         col = baij->garray[aj[j]]*bs;
1080         for (k=0; k<bs; k++) {
1081           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1082           col++; a += bs;
1083         }
1084       }
1085     }
1086     ierr = PetscFree(rvals);CHKERRQ(ierr);
1087     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1088     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1089     /*
1090        Everyone has to call to draw the matrix since the graphics waits are
1091        synchronized across all processors that share the PetscDraw object
1092     */
1093     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1094     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1095     if (!rank) {
1096       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1097       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1098     }
1099     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1100     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1101     ierr = MatDestroy(&A);CHKERRQ(ierr);
1102   }
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1107 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1108 {
1109   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
1110   Mat_SeqBAIJ    *A   = (Mat_SeqBAIJ*)aij->A->data;
1111   Mat_SeqBAIJ    *B   = (Mat_SeqBAIJ*)aij->B->data;
1112   const PetscInt *garray = aij->garray;
1113   PetscInt       header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l;
1114   PetscInt       *rowlens,*colidxs;
1115   PetscScalar    *matvals;
1116   PetscErrorCode ierr;
1117 
1118   PetscFunctionBegin;
1119   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1120 
1121   M  = mat->rmap->N;
1122   N  = mat->cmap->N;
1123   m  = mat->rmap->n;
1124   rs = mat->rmap->rstart;
1125   cs = mat->cmap->rstart;
1126   bs = mat->rmap->bs;
1127   nz = bs*bs*(A->nz + B->nz);
1128 
1129   /* write matrix header */
1130   header[0] = MAT_FILE_CLASSID;
1131   header[1] = M; header[2] = N; header[3] = nz;
1132   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1133   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1134 
1135   /* fill in and store row lengths */
1136   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1137   for (cnt=0, i=0; i<A->mbs; i++)
1138     for (j=0; j<bs; j++)
1139       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]);
1140   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1141   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1142 
1143   /* fill in and store column indices */
1144   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1145   for (cnt=0, i=0; i<A->mbs; i++) {
1146     for (k=0; k<bs; k++) {
1147       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1148         if (garray[B->j[jb]] > cs/bs) break;
1149         for (l=0; l<bs; l++)
1150           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1151       }
1152       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1153         for (l=0; l<bs; l++)
1154           colidxs[cnt++] = bs*A->j[ja] + l + cs;
1155       for (; jb<B->i[i+1]; jb++)
1156         for (l=0; l<bs; l++)
1157           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1158     }
1159   }
1160   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1161   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT);CHKERRQ(ierr);
1162   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1163 
1164   /* fill in and store nonzero values */
1165   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1166   for (cnt=0, i=0; i<A->mbs; i++) {
1167     for (k=0; k<bs; k++) {
1168       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1169         if (garray[B->j[jb]] > cs/bs) break;
1170         for (l=0; l<bs; l++)
1171           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1172       }
1173       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1174         for (l=0; l<bs; l++)
1175           matvals[cnt++] = A->a[bs*(bs*ja + l) + k];
1176       for (; jb<B->i[i+1]; jb++)
1177         for (l=0; l<bs; l++)
1178           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];;
1179     }
1180   }
1181   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR);CHKERRQ(ierr);
1182   ierr = PetscFree(matvals);CHKERRQ(ierr);
1183 
1184   /* write block size option to the viewer's .info file */
1185   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1190 {
1191   PetscErrorCode ierr;
1192   PetscBool      iascii,isdraw,issocket,isbinary;
1193 
1194   PetscFunctionBegin;
1195   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1196   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1197   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1198   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1199   if (iascii || isdraw || issocket) {
1200     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1201   } else if (isbinary) {
1202     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1203   }
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1208 {
1209   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1210   PetscErrorCode ierr;
1211 
1212   PetscFunctionBegin;
1213 #if defined(PETSC_USE_LOG)
1214   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1215 #endif
1216   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1217   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1218   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1219   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1220 #if defined(PETSC_USE_CTABLE)
1221   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1222 #else
1223   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1224 #endif
1225   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1226   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1227   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1228   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1229   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1230   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1231   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1232   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1233 
1234   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1235   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1236   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1237   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1238   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1239   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1240   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1241   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1242   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1243 #if defined(PETSC_HAVE_HYPRE)
1244   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr);
1245 #endif
1246   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr);
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1251 {
1252   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1253   PetscErrorCode ierr;
1254   PetscInt       nt;
1255 
1256   PetscFunctionBegin;
1257   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1258   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1259   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1260   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1261   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1262   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1263   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1264   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1269 {
1270   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1271   PetscErrorCode ierr;
1272 
1273   PetscFunctionBegin;
1274   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1275   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1276   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1277   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1282 {
1283   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1284   PetscErrorCode ierr;
1285 
1286   PetscFunctionBegin;
1287   /* do nondiagonal part */
1288   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1289   /* do local part */
1290   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1291   /* add partial results together */
1292   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1293   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1294   PetscFunctionReturn(0);
1295 }
1296 
1297 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1298 {
1299   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1300   PetscErrorCode ierr;
1301 
1302   PetscFunctionBegin;
1303   /* do nondiagonal part */
1304   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1305   /* do local part */
1306   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1307   /* add partial results together */
1308   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1309   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 /*
1314   This only works correctly for square matrices where the subblock A->A is the
1315    diagonal block
1316 */
1317 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1318 {
1319   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1320   PetscErrorCode ierr;
1321 
1322   PetscFunctionBegin;
1323   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1324   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1325   PetscFunctionReturn(0);
1326 }
1327 
1328 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1329 {
1330   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1331   PetscErrorCode ierr;
1332 
1333   PetscFunctionBegin;
1334   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1335   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1340 {
1341   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1342   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1343   PetscErrorCode ierr;
1344   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1345   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1346   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1347 
1348   PetscFunctionBegin;
1349   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1350   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1351   mat->getrowactive = PETSC_TRUE;
1352 
1353   if (!mat->rowvalues && (idx || v)) {
1354     /*
1355         allocate enough space to hold information from the longest row.
1356     */
1357     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1358     PetscInt    max = 1,mbs = mat->mbs,tmp;
1359     for (i=0; i<mbs; i++) {
1360       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1361       if (max < tmp) max = tmp;
1362     }
1363     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1364   }
1365   lrow = row - brstart;
1366 
1367   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1368   if (!v)   {pvA = 0; pvB = 0;}
1369   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1370   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1371   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1372   nztot = nzA + nzB;
1373 
1374   cmap = mat->garray;
1375   if (v  || idx) {
1376     if (nztot) {
1377       /* Sort by increasing column numbers, assuming A and B already sorted */
1378       PetscInt imark = -1;
1379       if (v) {
1380         *v = v_p = mat->rowvalues;
1381         for (i=0; i<nzB; i++) {
1382           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1383           else break;
1384         }
1385         imark = i;
1386         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1387         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1388       }
1389       if (idx) {
1390         *idx = idx_p = mat->rowindices;
1391         if (imark > -1) {
1392           for (i=0; i<imark; i++) {
1393             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1394           }
1395         } else {
1396           for (i=0; i<nzB; i++) {
1397             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1398             else break;
1399           }
1400           imark = i;
1401         }
1402         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1403         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1404       }
1405     } else {
1406       if (idx) *idx = 0;
1407       if (v)   *v   = 0;
1408     }
1409   }
1410   *nz  = nztot;
1411   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1412   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1417 {
1418   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1419 
1420   PetscFunctionBegin;
1421   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1422   baij->getrowactive = PETSC_FALSE;
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1427 {
1428   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1429   PetscErrorCode ierr;
1430 
1431   PetscFunctionBegin;
1432   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1433   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1434   PetscFunctionReturn(0);
1435 }
1436 
1437 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1438 {
1439   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1440   Mat            A  = a->A,B = a->B;
1441   PetscErrorCode ierr;
1442   PetscLogDouble isend[5],irecv[5];
1443 
1444   PetscFunctionBegin;
1445   info->block_size = (PetscReal)matin->rmap->bs;
1446 
1447   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1448 
1449   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1450   isend[3] = info->memory;  isend[4] = info->mallocs;
1451 
1452   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1453 
1454   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1455   isend[3] += info->memory;  isend[4] += info->mallocs;
1456 
1457   if (flag == MAT_LOCAL) {
1458     info->nz_used      = isend[0];
1459     info->nz_allocated = isend[1];
1460     info->nz_unneeded  = isend[2];
1461     info->memory       = isend[3];
1462     info->mallocs      = isend[4];
1463   } else if (flag == MAT_GLOBAL_MAX) {
1464     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1465 
1466     info->nz_used      = irecv[0];
1467     info->nz_allocated = irecv[1];
1468     info->nz_unneeded  = irecv[2];
1469     info->memory       = irecv[3];
1470     info->mallocs      = irecv[4];
1471   } else if (flag == MAT_GLOBAL_SUM) {
1472     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1473 
1474     info->nz_used      = irecv[0];
1475     info->nz_allocated = irecv[1];
1476     info->nz_unneeded  = irecv[2];
1477     info->memory       = irecv[3];
1478     info->mallocs      = irecv[4];
1479   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1480   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1481   info->fill_ratio_needed = 0;
1482   info->factor_mallocs    = 0;
1483   PetscFunctionReturn(0);
1484 }
1485 
1486 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1487 {
1488   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1489   PetscErrorCode ierr;
1490 
1491   PetscFunctionBegin;
1492   switch (op) {
1493   case MAT_NEW_NONZERO_LOCATIONS:
1494   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1495   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1496   case MAT_KEEP_NONZERO_PATTERN:
1497   case MAT_NEW_NONZERO_LOCATION_ERR:
1498     MatCheckPreallocated(A,1);
1499     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1500     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1501     break;
1502   case MAT_ROW_ORIENTED:
1503     MatCheckPreallocated(A,1);
1504     a->roworiented = flg;
1505 
1506     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1507     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1508     break;
1509   case MAT_NEW_DIAGONALS:
1510   case MAT_SORTED_FULL:
1511     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1512     break;
1513   case MAT_IGNORE_OFF_PROC_ENTRIES:
1514     a->donotstash = flg;
1515     break;
1516   case MAT_USE_HASH_TABLE:
1517     a->ht_flag = flg;
1518     a->ht_fact = 1.39;
1519     break;
1520   case MAT_SYMMETRIC:
1521   case MAT_STRUCTURALLY_SYMMETRIC:
1522   case MAT_HERMITIAN:
1523   case MAT_SUBMAT_SINGLEIS:
1524   case MAT_SYMMETRY_ETERNAL:
1525     MatCheckPreallocated(A,1);
1526     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1527     break;
1528   default:
1529     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1530   }
1531   PetscFunctionReturn(0);
1532 }
1533 
1534 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1535 {
1536   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1537   Mat_SeqBAIJ    *Aloc;
1538   Mat            B;
1539   PetscErrorCode ierr;
1540   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1541   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1542   MatScalar      *a;
1543 
1544   PetscFunctionBegin;
1545   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1546     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1547     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1548     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1549     /* Do not know preallocation information, but must set block size */
1550     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1551   } else {
1552     B = *matout;
1553   }
1554 
1555   /* copy over the A part */
1556   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1557   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1558   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1559 
1560   for (i=0; i<mbs; i++) {
1561     rvals[0] = bs*(baij->rstartbs + i);
1562     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1563     for (j=ai[i]; j<ai[i+1]; j++) {
1564       col = (baij->cstartbs+aj[j])*bs;
1565       for (k=0; k<bs; k++) {
1566         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1567 
1568         col++; a += bs;
1569       }
1570     }
1571   }
1572   /* copy over the B part */
1573   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1574   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1575   for (i=0; i<mbs; i++) {
1576     rvals[0] = bs*(baij->rstartbs + i);
1577     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1578     for (j=ai[i]; j<ai[i+1]; j++) {
1579       col = baij->garray[aj[j]]*bs;
1580       for (k=0; k<bs; k++) {
1581         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1582         col++;
1583         a += bs;
1584       }
1585     }
1586   }
1587   ierr = PetscFree(rvals);CHKERRQ(ierr);
1588   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1589   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1590 
1591   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1592   else {
1593     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1594   }
1595   PetscFunctionReturn(0);
1596 }
1597 
1598 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1599 {
1600   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1601   Mat            a     = baij->A,b = baij->B;
1602   PetscErrorCode ierr;
1603   PetscInt       s1,s2,s3;
1604 
1605   PetscFunctionBegin;
1606   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1607   if (rr) {
1608     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1609     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1610     /* Overlap communication with computation. */
1611     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1612   }
1613   if (ll) {
1614     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1615     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1616     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1617   }
1618   /* scale  the diagonal block */
1619   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1620 
1621   if (rr) {
1622     /* Do a scatter end and then right scale the off-diagonal block */
1623     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1624     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1625   }
1626   PetscFunctionReturn(0);
1627 }
1628 
1629 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1630 {
1631   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1632   PetscInt      *lrows;
1633   PetscInt       r, len;
1634   PetscBool      cong;
1635   PetscErrorCode ierr;
1636 
1637   PetscFunctionBegin;
1638   /* get locally owned rows */
1639   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
1640   /* fix right hand side if needed */
1641   if (x && b) {
1642     const PetscScalar *xx;
1643     PetscScalar       *bb;
1644 
1645     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1646     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1647     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1648     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1649     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1650   }
1651 
1652   /* actually zap the local rows */
1653   /*
1654         Zero the required rows. If the "diagonal block" of the matrix
1655      is square and the user wishes to set the diagonal we use separate
1656      code so that MatSetValues() is not called for each diagonal allocating
1657      new memory, thus calling lots of mallocs and slowing things down.
1658 
1659   */
1660   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1661   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1662   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1663   if ((diag != 0.0) && cong) {
1664     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1665   } else if (diag != 0.0) {
1666     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1667     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1668        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1669     for (r = 0; r < len; ++r) {
1670       const PetscInt row = lrows[r] + A->rmap->rstart;
1671       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1672     }
1673     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1674     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1675   } else {
1676     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1677   }
1678   ierr = PetscFree(lrows);CHKERRQ(ierr);
1679 
1680   /* only change matrix nonzero state if pattern was allowed to be changed */
1681   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1682     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1683     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1684   }
1685   PetscFunctionReturn(0);
1686 }
1687 
1688 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1689 {
1690   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1691   PetscErrorCode    ierr;
1692   PetscMPIInt       n = A->rmap->n,p = 0;
1693   PetscInt          i,j,k,r,len = 0,row,col,count;
1694   PetscInt          *lrows,*owners = A->rmap->range;
1695   PetscSFNode       *rrows;
1696   PetscSF           sf;
1697   const PetscScalar *xx;
1698   PetscScalar       *bb,*mask;
1699   Vec               xmask,lmask;
1700   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1701   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1702   PetscScalar       *aa;
1703 
1704   PetscFunctionBegin;
1705   /* Create SF where leaves are input rows and roots are owned rows */
1706   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1707   for (r = 0; r < n; ++r) lrows[r] = -1;
1708   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1709   for (r = 0; r < N; ++r) {
1710     const PetscInt idx   = rows[r];
1711     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1712     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1713       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1714     }
1715     rrows[r].rank  = p;
1716     rrows[r].index = rows[r] - owners[p];
1717   }
1718   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1719   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1720   /* Collect flags for rows to be zeroed */
1721   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1722   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1723   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1724   /* Compress and put in row numbers */
1725   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1726   /* zero diagonal part of matrix */
1727   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1728   /* handle off diagonal part of matrix */
1729   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1730   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1731   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1732   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1733   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1734   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1735   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1736   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1737   if (x) {
1738     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1739     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1740     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1741     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1742   }
1743   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1744   /* remove zeroed rows of off diagonal matrix */
1745   for (i = 0; i < len; ++i) {
1746     row   = lrows[i];
1747     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1748     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1749     for (k = 0; k < count; ++k) {
1750       aa[0] = 0.0;
1751       aa   += bs;
1752     }
1753   }
1754   /* loop over all elements of off process part of matrix zeroing removed columns*/
1755   for (i = 0; i < l->B->rmap->N; ++i) {
1756     row = i/bs;
1757     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1758       for (k = 0; k < bs; ++k) {
1759         col = bs*baij->j[j] + k;
1760         if (PetscAbsScalar(mask[col])) {
1761           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1762           if (x) bb[i] -= aa[0]*xx[col];
1763           aa[0] = 0.0;
1764         }
1765       }
1766     }
1767   }
1768   if (x) {
1769     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1770     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1771   }
1772   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1773   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1774   ierr = PetscFree(lrows);CHKERRQ(ierr);
1775 
1776   /* only change matrix nonzero state if pattern was allowed to be changed */
1777   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1778     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1779     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1780   }
1781   PetscFunctionReturn(0);
1782 }
1783 
1784 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1785 {
1786   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1787   PetscErrorCode ierr;
1788 
1789   PetscFunctionBegin;
1790   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1795 
1796 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1797 {
1798   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1799   Mat            a,b,c,d;
1800   PetscBool      flg;
1801   PetscErrorCode ierr;
1802 
1803   PetscFunctionBegin;
1804   a = matA->A; b = matA->B;
1805   c = matB->A; d = matB->B;
1806 
1807   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1808   if (flg) {
1809     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1810   }
1811   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1812   PetscFunctionReturn(0);
1813 }
1814 
1815 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1816 {
1817   PetscErrorCode ierr;
1818   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1819   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1820 
1821   PetscFunctionBegin;
1822   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1823   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1824     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1825   } else {
1826     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1827     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1828   }
1829   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1830   PetscFunctionReturn(0);
1831 }
1832 
1833 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1834 {
1835   PetscErrorCode ierr;
1836 
1837   PetscFunctionBegin;
1838   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1839   PetscFunctionReturn(0);
1840 }
1841 
1842 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1843 {
1844   PetscErrorCode ierr;
1845   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1846   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1847   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1848 
1849   PetscFunctionBegin;
1850   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
1851   PetscFunctionReturn(0);
1852 }
1853 
1854 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1855 {
1856   PetscErrorCode ierr;
1857   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1858   PetscBLASInt   bnz,one=1;
1859   Mat_SeqBAIJ    *x,*y;
1860   PetscInt       bs2 = Y->rmap->bs*Y->rmap->bs;
1861 
1862   PetscFunctionBegin;
1863   if (str == SAME_NONZERO_PATTERN) {
1864     PetscScalar alpha = a;
1865     x    = (Mat_SeqBAIJ*)xx->A->data;
1866     y    = (Mat_SeqBAIJ*)yy->A->data;
1867     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1868     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1869     x    = (Mat_SeqBAIJ*)xx->B->data;
1870     y    = (Mat_SeqBAIJ*)yy->B->data;
1871     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1872     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1873     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1874   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1875     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1876   } else {
1877     Mat      B;
1878     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1879     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1880     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1881     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1882     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1883     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1884     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1885     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
1886     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1887     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1888     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1889     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1890     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1891     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1892     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1893     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1894   }
1895   PetscFunctionReturn(0);
1896 }
1897 
1898 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1899 {
1900   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1901   PetscErrorCode ierr;
1902 
1903   PetscFunctionBegin;
1904   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1905   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1906   PetscFunctionReturn(0);
1907 }
1908 
1909 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1910 {
1911   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1912   PetscErrorCode ierr;
1913 
1914   PetscFunctionBegin;
1915   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1916   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1917   PetscFunctionReturn(0);
1918 }
1919 
1920 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1921 {
1922   PetscErrorCode ierr;
1923   IS             iscol_local;
1924   PetscInt       csize;
1925 
1926   PetscFunctionBegin;
1927   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1928   if (call == MAT_REUSE_MATRIX) {
1929     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1930     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1931   } else {
1932     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1933   }
1934   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1935   if (call == MAT_INITIAL_MATRIX) {
1936     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1937     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1938   }
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 /*
1943   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1944   in local and then by concatenating the local matrices the end result.
1945   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1946   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1947 */
1948 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1949 {
1950   PetscErrorCode ierr;
1951   PetscMPIInt    rank,size;
1952   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1953   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1954   Mat            M,Mreuse;
1955   MatScalar      *vwork,*aa;
1956   MPI_Comm       comm;
1957   IS             isrow_new, iscol_new;
1958   Mat_SeqBAIJ    *aij;
1959 
1960   PetscFunctionBegin;
1961   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
1962   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1963   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1964   /* The compression and expansion should be avoided. Doesn't point
1965      out errors, might change the indices, hence buggey */
1966   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
1967   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
1968 
1969   if (call ==  MAT_REUSE_MATRIX) {
1970     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
1971     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1972     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr);
1973   } else {
1974     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr);
1975   }
1976   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
1977   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
1978   /*
1979       m - number of local rows
1980       n - number of columns (same on all processors)
1981       rstart - first row in new global matrix generated
1982   */
1983   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1984   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1985   m    = m/bs;
1986   n    = n/bs;
1987 
1988   if (call == MAT_INITIAL_MATRIX) {
1989     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1990     ii  = aij->i;
1991     jj  = aij->j;
1992 
1993     /*
1994         Determine the number of non-zeros in the diagonal and off-diagonal
1995         portions of the matrix in order to do correct preallocation
1996     */
1997 
1998     /* first get start and end of "diagonal" columns */
1999     if (csize == PETSC_DECIDE) {
2000       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2001       if (mglobal == n*bs) { /* square matrix */
2002         nlocal = m;
2003       } else {
2004         nlocal = n/size + ((n % size) > rank);
2005       }
2006     } else {
2007       nlocal = csize/bs;
2008     }
2009     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2010     rstart = rend - nlocal;
2011     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2012 
2013     /* next, compute all the lengths */
2014     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2015     for (i=0; i<m; i++) {
2016       jend = ii[i+1] - ii[i];
2017       olen = 0;
2018       dlen = 0;
2019       for (j=0; j<jend; j++) {
2020         if (*jj < rstart || *jj >= rend) olen++;
2021         else dlen++;
2022         jj++;
2023       }
2024       olens[i] = olen;
2025       dlens[i] = dlen;
2026     }
2027     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2028     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2029     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2030     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2031     ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2032     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2033   } else {
2034     PetscInt ml,nl;
2035 
2036     M    = *newmat;
2037     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2038     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2039     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2040     /*
2041          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2042        rather than the slower MatSetValues().
2043     */
2044     M->was_assembled = PETSC_TRUE;
2045     M->assembled     = PETSC_FALSE;
2046   }
2047   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2048   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2049   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2050   ii   = aij->i;
2051   jj   = aij->j;
2052   aa   = aij->a;
2053   for (i=0; i<m; i++) {
2054     row   = rstart/bs + i;
2055     nz    = ii[i+1] - ii[i];
2056     cwork = jj;     jj += nz;
2057     vwork = aa;     aa += nz*bs*bs;
2058     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2059   }
2060 
2061   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2062   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2063   *newmat = M;
2064 
2065   /* save submatrix used in processor for next request */
2066   if (call ==  MAT_INITIAL_MATRIX) {
2067     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2068     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2069   }
2070   PetscFunctionReturn(0);
2071 }
2072 
2073 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2074 {
2075   MPI_Comm       comm,pcomm;
2076   PetscInt       clocal_size,nrows;
2077   const PetscInt *rows;
2078   PetscMPIInt    size;
2079   IS             crowp,lcolp;
2080   PetscErrorCode ierr;
2081 
2082   PetscFunctionBegin;
2083   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2084   /* make a collective version of 'rowp' */
2085   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2086   if (pcomm==comm) {
2087     crowp = rowp;
2088   } else {
2089     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2090     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2091     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2092     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2093   }
2094   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2095   /* make a local version of 'colp' */
2096   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2097   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2098   if (size==1) {
2099     lcolp = colp;
2100   } else {
2101     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2102   }
2103   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2104   /* now we just get the submatrix */
2105   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2106   ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2107   /* clean up */
2108   if (pcomm!=comm) {
2109     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2110   }
2111   if (size>1) {
2112     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2113   }
2114   PetscFunctionReturn(0);
2115 }
2116 
2117 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2118 {
2119   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2120   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2121 
2122   PetscFunctionBegin;
2123   if (nghosts) *nghosts = B->nbs;
2124   if (ghosts) *ghosts = baij->garray;
2125   PetscFunctionReturn(0);
2126 }
2127 
2128 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2129 {
2130   Mat            B;
2131   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2132   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2133   Mat_SeqAIJ     *b;
2134   PetscErrorCode ierr;
2135   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2136   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2137   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2138 
2139   PetscFunctionBegin;
2140   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2141   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2142 
2143   /* ----------------------------------------------------------------
2144      Tell every processor the number of nonzeros per row
2145   */
2146   ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr);
2147   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2148     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2149   }
2150   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2151   displs    = recvcounts + size;
2152   for (i=0; i<size; i++) {
2153     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2154     displs[i]     = A->rmap->range[i]/bs;
2155   }
2156 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2157   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2158 #else
2159   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2160   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2161 #endif
2162   /* ---------------------------------------------------------------
2163      Create the sequential matrix of the same type as the local block diagonal
2164   */
2165   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2166   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2167   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2168   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2169   b    = (Mat_SeqAIJ*)B->data;
2170 
2171   /*--------------------------------------------------------------------
2172     Copy my part of matrix column indices over
2173   */
2174   sendcount  = ad->nz + bd->nz;
2175   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2176   a_jsendbuf = ad->j;
2177   b_jsendbuf = bd->j;
2178   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2179   cnt        = 0;
2180   for (i=0; i<n; i++) {
2181 
2182     /* put in lower diagonal portion */
2183     m = bd->i[i+1] - bd->i[i];
2184     while (m > 0) {
2185       /* is it above diagonal (in bd (compressed) numbering) */
2186       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2187       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2188       m--;
2189     }
2190 
2191     /* put in diagonal portion */
2192     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2193       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2194     }
2195 
2196     /* put in upper diagonal portion */
2197     while (m-- > 0) {
2198       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2199     }
2200   }
2201   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2202 
2203   /*--------------------------------------------------------------------
2204     Gather all column indices to all processors
2205   */
2206   for (i=0; i<size; i++) {
2207     recvcounts[i] = 0;
2208     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2209       recvcounts[i] += lens[j];
2210     }
2211   }
2212   displs[0] = 0;
2213   for (i=1; i<size; i++) {
2214     displs[i] = displs[i-1] + recvcounts[i-1];
2215   }
2216 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2217   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2218 #else
2219   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2220 #endif
2221   /*--------------------------------------------------------------------
2222     Assemble the matrix into useable form (note numerical values not yet set)
2223   */
2224   /* set the b->ilen (length of each row) values */
2225   ierr = PetscArraycpy(b->ilen,lens,A->rmap->N/bs);CHKERRQ(ierr);
2226   /* set the b->i indices */
2227   b->i[0] = 0;
2228   for (i=1; i<=A->rmap->N/bs; i++) {
2229     b->i[i] = b->i[i-1] + lens[i-1];
2230   }
2231   ierr = PetscFree(lens);CHKERRQ(ierr);
2232   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2233   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2234   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2235 
2236   if (A->symmetric) {
2237     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2238   } else if (A->hermitian) {
2239     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2240   } else if (A->structurally_symmetric) {
2241     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2242   }
2243   *newmat = B;
2244   PetscFunctionReturn(0);
2245 }
2246 
2247 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2248 {
2249   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2250   PetscErrorCode ierr;
2251   Vec            bb1 = 0;
2252 
2253   PetscFunctionBegin;
2254   if (flag == SOR_APPLY_UPPER) {
2255     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2256     PetscFunctionReturn(0);
2257   }
2258 
2259   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2260     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2261   }
2262 
2263   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2264     if (flag & SOR_ZERO_INITIAL_GUESS) {
2265       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2266       its--;
2267     }
2268 
2269     while (its--) {
2270       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2271       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2272 
2273       /* update rhs: bb1 = bb - B*x */
2274       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2275       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2276 
2277       /* local sweep */
2278       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2279     }
2280   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2281     if (flag & SOR_ZERO_INITIAL_GUESS) {
2282       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2283       its--;
2284     }
2285     while (its--) {
2286       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2287       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2288 
2289       /* update rhs: bb1 = bb - B*x */
2290       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2291       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2292 
2293       /* local sweep */
2294       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2295     }
2296   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2297     if (flag & SOR_ZERO_INITIAL_GUESS) {
2298       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2299       its--;
2300     }
2301     while (its--) {
2302       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2303       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2304 
2305       /* update rhs: bb1 = bb - B*x */
2306       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2307       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2308 
2309       /* local sweep */
2310       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2311     }
2312   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2313 
2314   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2315   PetscFunctionReturn(0);
2316 }
2317 
2318 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2319 {
2320   PetscErrorCode ierr;
2321   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2322   PetscInt       N,i,*garray = aij->garray;
2323   PetscInt       ib,jb,bs = A->rmap->bs;
2324   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2325   MatScalar      *a_val = a_aij->a;
2326   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2327   MatScalar      *b_val = b_aij->a;
2328   PetscReal      *work;
2329 
2330   PetscFunctionBegin;
2331   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2332   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2333   if (type == NORM_2) {
2334     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2335       for (jb=0; jb<bs; jb++) {
2336         for (ib=0; ib<bs; ib++) {
2337           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2338           a_val++;
2339         }
2340       }
2341     }
2342     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2343       for (jb=0; jb<bs; jb++) {
2344         for (ib=0; ib<bs; ib++) {
2345           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2346           b_val++;
2347         }
2348       }
2349     }
2350   } else if (type == NORM_1) {
2351     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2352       for (jb=0; jb<bs; jb++) {
2353         for (ib=0; ib<bs; ib++) {
2354           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2355           a_val++;
2356         }
2357       }
2358     }
2359     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2360       for (jb=0; jb<bs; jb++) {
2361        for (ib=0; ib<bs; ib++) {
2362           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2363           b_val++;
2364         }
2365       }
2366     }
2367   } else if (type == NORM_INFINITY) {
2368     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2369       for (jb=0; jb<bs; jb++) {
2370         for (ib=0; ib<bs; ib++) {
2371           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2372           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2373           a_val++;
2374         }
2375       }
2376     }
2377     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2378       for (jb=0; jb<bs; jb++) {
2379         for (ib=0; ib<bs; ib++) {
2380           int col = garray[b_aij->j[i]] * bs + jb;
2381           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2382           b_val++;
2383         }
2384       }
2385     }
2386   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2387   if (type == NORM_INFINITY) {
2388     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2389   } else {
2390     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2391   }
2392   ierr = PetscFree(work);CHKERRQ(ierr);
2393   if (type == NORM_2) {
2394     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2395   }
2396   PetscFunctionReturn(0);
2397 }
2398 
2399 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2400 {
2401   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2402   PetscErrorCode ierr;
2403 
2404   PetscFunctionBegin;
2405   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2406   A->factorerrortype             = a->A->factorerrortype;
2407   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2408   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2409   PetscFunctionReturn(0);
2410 }
2411 
2412 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
2413 {
2414   PetscErrorCode ierr;
2415   Mat_MPIBAIJ    *maij = (Mat_MPIBAIJ*)Y->data;
2416   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)maij->A->data;
2417 
2418   PetscFunctionBegin;
2419   if (!Y->preallocated) {
2420     ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
2421   } else if (!aij->nz) {
2422     PetscInt nonew = aij->nonew;
2423     ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2424     aij->nonew = nonew;
2425   }
2426   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2427   PetscFunctionReturn(0);
2428 }
2429 
2430 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2431 {
2432   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2433   PetscErrorCode ierr;
2434 
2435   PetscFunctionBegin;
2436   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2437   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2438   if (d) {
2439     PetscInt rstart;
2440     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2441     *d += rstart/A->rmap->bs;
2442 
2443   }
2444   PetscFunctionReturn(0);
2445 }
2446 
2447 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2448 {
2449   PetscFunctionBegin;
2450   *a = ((Mat_MPIBAIJ*)A->data)->A;
2451   PetscFunctionReturn(0);
2452 }
2453 
2454 /* -------------------------------------------------------------------*/
2455 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2456                                        MatGetRow_MPIBAIJ,
2457                                        MatRestoreRow_MPIBAIJ,
2458                                        MatMult_MPIBAIJ,
2459                                 /* 4*/ MatMultAdd_MPIBAIJ,
2460                                        MatMultTranspose_MPIBAIJ,
2461                                        MatMultTransposeAdd_MPIBAIJ,
2462                                        0,
2463                                        0,
2464                                        0,
2465                                 /*10*/ 0,
2466                                        0,
2467                                        0,
2468                                        MatSOR_MPIBAIJ,
2469                                        MatTranspose_MPIBAIJ,
2470                                 /*15*/ MatGetInfo_MPIBAIJ,
2471                                        MatEqual_MPIBAIJ,
2472                                        MatGetDiagonal_MPIBAIJ,
2473                                        MatDiagonalScale_MPIBAIJ,
2474                                        MatNorm_MPIBAIJ,
2475                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2476                                        MatAssemblyEnd_MPIBAIJ,
2477                                        MatSetOption_MPIBAIJ,
2478                                        MatZeroEntries_MPIBAIJ,
2479                                 /*24*/ MatZeroRows_MPIBAIJ,
2480                                        0,
2481                                        0,
2482                                        0,
2483                                        0,
2484                                 /*29*/ MatSetUp_MPIBAIJ,
2485                                        0,
2486                                        0,
2487                                        MatGetDiagonalBlock_MPIBAIJ,
2488                                        0,
2489                                 /*34*/ MatDuplicate_MPIBAIJ,
2490                                        0,
2491                                        0,
2492                                        0,
2493                                        0,
2494                                 /*39*/ MatAXPY_MPIBAIJ,
2495                                        MatCreateSubMatrices_MPIBAIJ,
2496                                        MatIncreaseOverlap_MPIBAIJ,
2497                                        MatGetValues_MPIBAIJ,
2498                                        MatCopy_MPIBAIJ,
2499                                 /*44*/ 0,
2500                                        MatScale_MPIBAIJ,
2501                                        MatShift_MPIBAIJ,
2502                                        0,
2503                                        MatZeroRowsColumns_MPIBAIJ,
2504                                 /*49*/ 0,
2505                                        0,
2506                                        0,
2507                                        0,
2508                                        0,
2509                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2510                                        0,
2511                                        MatSetUnfactored_MPIBAIJ,
2512                                        MatPermute_MPIBAIJ,
2513                                        MatSetValuesBlocked_MPIBAIJ,
2514                                 /*59*/ MatCreateSubMatrix_MPIBAIJ,
2515                                        MatDestroy_MPIBAIJ,
2516                                        MatView_MPIBAIJ,
2517                                        0,
2518                                        0,
2519                                 /*64*/ 0,
2520                                        0,
2521                                        0,
2522                                        0,
2523                                        0,
2524                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2525                                        0,
2526                                        0,
2527                                        0,
2528                                        0,
2529                                 /*74*/ 0,
2530                                        MatFDColoringApply_BAIJ,
2531                                        0,
2532                                        0,
2533                                        0,
2534                                 /*79*/ 0,
2535                                        0,
2536                                        0,
2537                                        0,
2538                                        MatLoad_MPIBAIJ,
2539                                 /*84*/ 0,
2540                                        0,
2541                                        0,
2542                                        0,
2543                                        0,
2544                                 /*89*/ 0,
2545                                        0,
2546                                        0,
2547                                        0,
2548                                        0,
2549                                 /*94*/ 0,
2550                                        0,
2551                                        0,
2552                                        0,
2553                                        0,
2554                                 /*99*/ 0,
2555                                        0,
2556                                        0,
2557                                        0,
2558                                        0,
2559                                 /*104*/0,
2560                                        MatRealPart_MPIBAIJ,
2561                                        MatImaginaryPart_MPIBAIJ,
2562                                        0,
2563                                        0,
2564                                 /*109*/0,
2565                                        0,
2566                                        0,
2567                                        0,
2568                                        MatMissingDiagonal_MPIBAIJ,
2569                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2570                                        0,
2571                                        MatGetGhosts_MPIBAIJ,
2572                                        0,
2573                                        0,
2574                                 /*119*/0,
2575                                        0,
2576                                        0,
2577                                        0,
2578                                        MatGetMultiProcBlock_MPIBAIJ,
2579                                 /*124*/0,
2580                                        MatGetColumnNorms_MPIBAIJ,
2581                                        MatInvertBlockDiagonal_MPIBAIJ,
2582                                        0,
2583                                        0,
2584                                /*129*/ 0,
2585                                        0,
2586                                        0,
2587                                        0,
2588                                        0,
2589                                /*134*/ 0,
2590                                        0,
2591                                        0,
2592                                        0,
2593                                        0,
2594                                /*139*/ MatSetBlockSizes_Default,
2595                                        0,
2596                                        0,
2597                                        MatFDColoringSetUp_MPIXAIJ,
2598                                        0,
2599                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
2600 };
2601 
2602 
2603 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
2604 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
2605 
2606 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2607 {
2608   PetscInt       m,rstart,cstart,cend;
2609   PetscInt       i,j,dlen,olen,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2610   const PetscInt *JJ    =0;
2611   PetscScalar    *values=0;
2612   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2613   PetscErrorCode ierr;
2614   PetscBool      nooffprocentries;
2615 
2616   PetscFunctionBegin;
2617   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2618   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2619   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2620   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2621   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2622   m      = B->rmap->n/bs;
2623   rstart = B->rmap->rstart/bs;
2624   cstart = B->cmap->rstart/bs;
2625   cend   = B->cmap->rend/bs;
2626 
2627   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2628   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2629   for (i=0; i<m; i++) {
2630     nz = ii[i+1] - ii[i];
2631     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2632     nz_max = PetscMax(nz_max,nz);
2633     dlen   = 0;
2634     olen   = 0;
2635     JJ     = jj + ii[i];
2636     for (j=0; j<nz; j++) {
2637       if (*JJ < cstart || *JJ >= cend) olen++;
2638       else dlen++;
2639       JJ++;
2640     }
2641     d_nnz[i] = dlen;
2642     o_nnz[i] = olen;
2643   }
2644   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2645   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2646 
2647   values = (PetscScalar*)V;
2648   if (!values) {
2649     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2650   }
2651   for (i=0; i<m; i++) {
2652     PetscInt          row    = i + rstart;
2653     PetscInt          ncols  = ii[i+1] - ii[i];
2654     const PetscInt    *icols = jj + ii[i];
2655     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2656       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2657       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2658     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2659       PetscInt j;
2660       for (j=0; j<ncols; j++) {
2661         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2662         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2663       }
2664     }
2665   }
2666 
2667   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2668   nooffprocentries    = B->nooffprocentries;
2669   B->nooffprocentries = PETSC_TRUE;
2670   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2671   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2672   B->nooffprocentries = nooffprocentries;
2673 
2674   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2675   PetscFunctionReturn(0);
2676 }
2677 
2678 /*@C
2679    MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
2680 
2681    Collective
2682 
2683    Input Parameters:
2684 +  B - the matrix
2685 .  bs - the block size
2686 .  i - the indices into j for the start of each local row (starts with zero)
2687 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2688 -  v - optional values in the matrix
2689 
2690    Level: advanced
2691 
2692    Notes:
2693     The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2694    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2695    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2696    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2697    block column and the second index is over columns within a block.
2698 
2699    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2700 
2701 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2702 @*/
2703 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2704 {
2705   PetscErrorCode ierr;
2706 
2707   PetscFunctionBegin;
2708   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2709   PetscValidType(B,1);
2710   PetscValidLogicalCollectiveInt(B,bs,2);
2711   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2712   PetscFunctionReturn(0);
2713 }
2714 
2715 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2716 {
2717   Mat_MPIBAIJ    *b;
2718   PetscErrorCode ierr;
2719   PetscInt       i;
2720   PetscMPIInt    size;
2721 
2722   PetscFunctionBegin;
2723   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2724   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2725   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2726   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2727 
2728   if (d_nnz) {
2729     for (i=0; i<B->rmap->n/bs; i++) {
2730       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2731     }
2732   }
2733   if (o_nnz) {
2734     for (i=0; i<B->rmap->n/bs; i++) {
2735       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2736     }
2737   }
2738 
2739   b      = (Mat_MPIBAIJ*)B->data;
2740   b->bs2 = bs*bs;
2741   b->mbs = B->rmap->n/bs;
2742   b->nbs = B->cmap->n/bs;
2743   b->Mbs = B->rmap->N/bs;
2744   b->Nbs = B->cmap->N/bs;
2745 
2746   for (i=0; i<=b->size; i++) {
2747     b->rangebs[i] = B->rmap->range[i]/bs;
2748   }
2749   b->rstartbs = B->rmap->rstart/bs;
2750   b->rendbs   = B->rmap->rend/bs;
2751   b->cstartbs = B->cmap->rstart/bs;
2752   b->cendbs   = B->cmap->rend/bs;
2753 
2754 #if defined(PETSC_USE_CTABLE)
2755   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2756 #else
2757   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2758 #endif
2759   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2760   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2761   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2762 
2763   /* Because the B will have been resized we simply destroy it and create a new one each time */
2764   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2765   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2766   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2767   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2768   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2769   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2770 
2771   if (!B->preallocated) {
2772     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2773     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2774     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2775     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2776     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2777   }
2778 
2779   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2780   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2781   B->preallocated  = PETSC_TRUE;
2782   B->was_assembled = PETSC_FALSE;
2783   B->assembled     = PETSC_FALSE;
2784   PetscFunctionReturn(0);
2785 }
2786 
2787 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2788 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2789 
2790 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2791 {
2792   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2793   PetscErrorCode ierr;
2794   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2795   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2796   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2797 
2798   PetscFunctionBegin;
2799   ierr  = PetscMalloc1(M+1,&ii);CHKERRQ(ierr);
2800   ii[0] = 0;
2801   for (i=0; i<M; i++) {
2802     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2803     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2804     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2805     /* remove one from count of matrix has diagonal */
2806     for (j=id[i]; j<id[i+1]; j++) {
2807       if (jd[j] == i) {ii[i+1]--;break;}
2808     }
2809   }
2810   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2811   cnt  = 0;
2812   for (i=0; i<M; i++) {
2813     for (j=io[i]; j<io[i+1]; j++) {
2814       if (garray[jo[j]] > rstart) break;
2815       jj[cnt++] = garray[jo[j]];
2816     }
2817     for (k=id[i]; k<id[i+1]; k++) {
2818       if (jd[k] != i) {
2819         jj[cnt++] = rstart + jd[k];
2820       }
2821     }
2822     for (; j<io[i+1]; j++) {
2823       jj[cnt++] = garray[jo[j]];
2824     }
2825   }
2826   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2827   PetscFunctionReturn(0);
2828 }
2829 
2830 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2831 
2832 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2833 
2834 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2835 {
2836   PetscErrorCode ierr;
2837   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2838   Mat            B;
2839   Mat_MPIAIJ     *b;
2840 
2841   PetscFunctionBegin;
2842   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2843 
2844   if (reuse == MAT_REUSE_MATRIX) {
2845     B = *newmat;
2846   } else {
2847     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2848     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2849     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2850     ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2851     ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2852     ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2853   }
2854   b = (Mat_MPIAIJ*) B->data;
2855 
2856   if (reuse == MAT_REUSE_MATRIX) {
2857     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr);
2858     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr);
2859   } else {
2860     ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2861     ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2862     ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2863     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2864     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2865     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2866     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2867   }
2868   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2869   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2870 
2871   if (reuse == MAT_INPLACE_MATRIX) {
2872     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
2873   } else {
2874    *newmat = B;
2875   }
2876   PetscFunctionReturn(0);
2877 }
2878 
2879 /*MC
2880    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2881 
2882    Options Database Keys:
2883 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2884 . -mat_block_size <bs> - set the blocksize used to store the matrix
2885 - -mat_use_hash_table <fact>
2886 
2887    Level: beginner
2888 
2889    Notes:
2890     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
2891     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
2892 
2893 .seealso: MatCreateBAIJ
2894 M*/
2895 
2896 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2897 
2898 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2899 {
2900   Mat_MPIBAIJ    *b;
2901   PetscErrorCode ierr;
2902   PetscBool      flg = PETSC_FALSE;
2903 
2904   PetscFunctionBegin;
2905   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2906   B->data = (void*)b;
2907 
2908   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2909   B->assembled = PETSC_FALSE;
2910 
2911   B->insertmode = NOT_SET_VALUES;
2912   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2913   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2914 
2915   /* build local table of row and column ownerships */
2916   ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr);
2917 
2918   /* build cache for off array entries formed */
2919   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2920 
2921   b->donotstash  = PETSC_FALSE;
2922   b->colmap      = NULL;
2923   b->garray      = NULL;
2924   b->roworiented = PETSC_TRUE;
2925 
2926   /* stuff used in block assembly */
2927   b->barray = 0;
2928 
2929   /* stuff used for matrix vector multiply */
2930   b->lvec  = 0;
2931   b->Mvctx = 0;
2932 
2933   /* stuff for MatGetRow() */
2934   b->rowindices   = 0;
2935   b->rowvalues    = 0;
2936   b->getrowactive = PETSC_FALSE;
2937 
2938   /* hash table stuff */
2939   b->ht           = 0;
2940   b->hd           = 0;
2941   b->ht_size      = 0;
2942   b->ht_flag      = PETSC_FALSE;
2943   b->ht_fact      = 0;
2944   b->ht_total_ct  = 0;
2945   b->ht_insert_ct = 0;
2946 
2947   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2948   b->ijonly = PETSC_FALSE;
2949 
2950 
2951   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
2952   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
2953   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
2954 #if defined(PETSC_HAVE_HYPRE)
2955   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
2956 #endif
2957   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2958   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2959   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2960   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2961   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2962   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2963   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
2964   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
2965 
2966   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2967   ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr);
2968   if (flg) {
2969     PetscReal fact = 1.39;
2970     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2971     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2972     if (fact <= 1.0) fact = 1.39;
2973     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2974     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2975   }
2976   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2977   PetscFunctionReturn(0);
2978 }
2979 
2980 /*MC
2981    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2982 
2983    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2984    and MATMPIBAIJ otherwise.
2985 
2986    Options Database Keys:
2987 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2988 
2989   Level: beginner
2990 
2991 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2992 M*/
2993 
2994 /*@C
2995    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2996    (block compressed row).  For good matrix assembly performance
2997    the user should preallocate the matrix storage by setting the parameters
2998    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2999    performance can be increased by more than a factor of 50.
3000 
3001    Collective on Mat
3002 
3003    Input Parameters:
3004 +  B - the matrix
3005 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3006           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3007 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3008            submatrix  (same for all local rows)
3009 .  d_nnz - array containing the number of block nonzeros in the various block rows
3010            of the in diagonal portion of the local (possibly different for each block
3011            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3012            set it even if it is zero.
3013 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3014            submatrix (same for all local rows).
3015 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3016            off-diagonal portion of the local submatrix (possibly different for
3017            each block row) or NULL.
3018 
3019    If the *_nnz parameter is given then the *_nz parameter is ignored
3020 
3021    Options Database Keys:
3022 +   -mat_block_size - size of the blocks to use
3023 -   -mat_use_hash_table <fact>
3024 
3025    Notes:
3026    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3027    than it must be used on all processors that share the object for that argument.
3028 
3029    Storage Information:
3030    For a square global matrix we define each processor's diagonal portion
3031    to be its local rows and the corresponding columns (a square submatrix);
3032    each processor's off-diagonal portion encompasses the remainder of the
3033    local matrix (a rectangular submatrix).
3034 
3035    The user can specify preallocated storage for the diagonal part of
3036    the local submatrix with either d_nz or d_nnz (not both).  Set
3037    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3038    memory allocation.  Likewise, specify preallocated storage for the
3039    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3040 
3041    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3042    the figure below we depict these three local rows and all columns (0-11).
3043 
3044 .vb
3045            0 1 2 3 4 5 6 7 8 9 10 11
3046           --------------------------
3047    row 3  |o o o d d d o o o o  o  o
3048    row 4  |o o o d d d o o o o  o  o
3049    row 5  |o o o d d d o o o o  o  o
3050           --------------------------
3051 .ve
3052 
3053    Thus, any entries in the d locations are stored in the d (diagonal)
3054    submatrix, and any entries in the o locations are stored in the
3055    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3056    stored simply in the MATSEQBAIJ format for compressed row storage.
3057 
3058    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3059    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3060    In general, for PDE problems in which most nonzeros are near the diagonal,
3061    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3062    or you will get TERRIBLE performance; see the users' manual chapter on
3063    matrices.
3064 
3065    You can call MatGetInfo() to get information on how effective the preallocation was;
3066    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3067    You can also run with the option -info and look for messages with the string
3068    malloc in them to see if additional memory allocation was needed.
3069 
3070    Level: intermediate
3071 
3072 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3073 @*/
3074 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3075 {
3076   PetscErrorCode ierr;
3077 
3078   PetscFunctionBegin;
3079   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3080   PetscValidType(B,1);
3081   PetscValidLogicalCollectiveInt(B,bs,2);
3082   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3083   PetscFunctionReturn(0);
3084 }
3085 
3086 /*@C
3087    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3088    (block compressed row).  For good matrix assembly performance
3089    the user should preallocate the matrix storage by setting the parameters
3090    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3091    performance can be increased by more than a factor of 50.
3092 
3093    Collective
3094 
3095    Input Parameters:
3096 +  comm - MPI communicator
3097 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3098           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3099 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3100            This value should be the same as the local size used in creating the
3101            y vector for the matrix-vector product y = Ax.
3102 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3103            This value should be the same as the local size used in creating the
3104            x vector for the matrix-vector product y = Ax.
3105 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3106 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3107 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3108            submatrix  (same for all local rows)
3109 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3110            of the in diagonal portion of the local (possibly different for each block
3111            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3112            and set it even if it is zero.
3113 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3114            submatrix (same for all local rows).
3115 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3116            off-diagonal portion of the local submatrix (possibly different for
3117            each block row) or NULL.
3118 
3119    Output Parameter:
3120 .  A - the matrix
3121 
3122    Options Database Keys:
3123 +   -mat_block_size - size of the blocks to use
3124 -   -mat_use_hash_table <fact>
3125 
3126    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3127    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3128    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3129 
3130    Notes:
3131    If the *_nnz parameter is given then the *_nz parameter is ignored
3132 
3133    A nonzero block is any block that as 1 or more nonzeros in it
3134 
3135    The user MUST specify either the local or global matrix dimensions
3136    (possibly both).
3137 
3138    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3139    than it must be used on all processors that share the object for that argument.
3140 
3141    Storage Information:
3142    For a square global matrix we define each processor's diagonal portion
3143    to be its local rows and the corresponding columns (a square submatrix);
3144    each processor's off-diagonal portion encompasses the remainder of the
3145    local matrix (a rectangular submatrix).
3146 
3147    The user can specify preallocated storage for the diagonal part of
3148    the local submatrix with either d_nz or d_nnz (not both).  Set
3149    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3150    memory allocation.  Likewise, specify preallocated storage for the
3151    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3152 
3153    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3154    the figure below we depict these three local rows and all columns (0-11).
3155 
3156 .vb
3157            0 1 2 3 4 5 6 7 8 9 10 11
3158           --------------------------
3159    row 3  |o o o d d d o o o o  o  o
3160    row 4  |o o o d d d o o o o  o  o
3161    row 5  |o o o d d d o o o o  o  o
3162           --------------------------
3163 .ve
3164 
3165    Thus, any entries in the d locations are stored in the d (diagonal)
3166    submatrix, and any entries in the o locations are stored in the
3167    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3168    stored simply in the MATSEQBAIJ format for compressed row storage.
3169 
3170    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3171    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3172    In general, for PDE problems in which most nonzeros are near the diagonal,
3173    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3174    or you will get TERRIBLE performance; see the users' manual chapter on
3175    matrices.
3176 
3177    Level: intermediate
3178 
3179 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3180 @*/
3181 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3182 {
3183   PetscErrorCode ierr;
3184   PetscMPIInt    size;
3185 
3186   PetscFunctionBegin;
3187   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3188   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3189   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3190   if (size > 1) {
3191     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3192     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3193   } else {
3194     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3195     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3196   }
3197   PetscFunctionReturn(0);
3198 }
3199 
3200 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3201 {
3202   Mat            mat;
3203   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3204   PetscErrorCode ierr;
3205   PetscInt       len=0;
3206 
3207   PetscFunctionBegin;
3208   *newmat = 0;
3209   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3210   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3211   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3212 
3213   mat->factortype   = matin->factortype;
3214   mat->preallocated = PETSC_TRUE;
3215   mat->assembled    = PETSC_TRUE;
3216   mat->insertmode   = NOT_SET_VALUES;
3217 
3218   a             = (Mat_MPIBAIJ*)mat->data;
3219   mat->rmap->bs = matin->rmap->bs;
3220   a->bs2        = oldmat->bs2;
3221   a->mbs        = oldmat->mbs;
3222   a->nbs        = oldmat->nbs;
3223   a->Mbs        = oldmat->Mbs;
3224   a->Nbs        = oldmat->Nbs;
3225 
3226   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3227   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3228 
3229   a->size         = oldmat->size;
3230   a->rank         = oldmat->rank;
3231   a->donotstash   = oldmat->donotstash;
3232   a->roworiented  = oldmat->roworiented;
3233   a->rowindices   = 0;
3234   a->rowvalues    = 0;
3235   a->getrowactive = PETSC_FALSE;
3236   a->barray       = 0;
3237   a->rstartbs     = oldmat->rstartbs;
3238   a->rendbs       = oldmat->rendbs;
3239   a->cstartbs     = oldmat->cstartbs;
3240   a->cendbs       = oldmat->cendbs;
3241 
3242   /* hash table stuff */
3243   a->ht           = 0;
3244   a->hd           = 0;
3245   a->ht_size      = 0;
3246   a->ht_flag      = oldmat->ht_flag;
3247   a->ht_fact      = oldmat->ht_fact;
3248   a->ht_total_ct  = 0;
3249   a->ht_insert_ct = 0;
3250 
3251   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1);CHKERRQ(ierr);
3252   if (oldmat->colmap) {
3253 #if defined(PETSC_USE_CTABLE)
3254     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3255 #else
3256     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
3257     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3258     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
3259 #endif
3260   } else a->colmap = 0;
3261 
3262   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3263     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3264     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3265     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
3266   } else a->garray = 0;
3267 
3268   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3269   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3270   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3271   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3272   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3273 
3274   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3275   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3276   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3277   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3278   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3279   *newmat = mat;
3280   PetscFunctionReturn(0);
3281 }
3282 
3283 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3284 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
3285 {
3286   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3287   PetscInt       *rowidxs,*colidxs,rs,cs,ce;
3288   PetscScalar    *matvals;
3289   PetscErrorCode ierr;
3290 
3291   PetscFunctionBegin;
3292   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3293 
3294   /* read in matrix header */
3295   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3296   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3297   M  = header[1]; N = header[2]; nz = header[3];
3298   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3299   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3300   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ");
3301 
3302   /* set block sizes from the viewer's .info file */
3303   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3304   /* set local sizes if not set already */
3305   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3306   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3307   /* set global sizes if not set already */
3308   if (mat->rmap->N < 0) mat->rmap->N = M;
3309   if (mat->cmap->N < 0) mat->cmap->N = N;
3310   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3311   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3312 
3313   /* check if the matrix sizes are correct */
3314   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3315   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3316   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3317   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
3318   ierr = PetscLayoutGetRange(mat->rmap,&rs,NULL);
3319   ierr = PetscLayoutGetRange(mat->cmap,&cs,&ce);
3320   mbs = m/bs; nbs = n/bs;
3321 
3322   /* read in row lengths and build row indices */
3323   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3324   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3325   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3326   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
3327   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3328 
3329   /* read in column indices and matrix values */
3330   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3331   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3332   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3333 
3334   { /* preallocate matrix storage */
3335     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3336     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3337     PetscBool  sbaij,done;
3338     PetscInt   *d_nnz,*o_nnz;
3339 
3340     ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr);
3341     ierr = PetscHSetICreate(&ht);CHKERRQ(ierr);
3342     ierr = PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz);CHKERRQ(ierr);
3343     ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij);CHKERRQ(ierr);
3344     for (i=0; i<mbs; i++) {
3345       ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr);
3346       ierr = PetscHSetIClear(ht);CHKERRQ(ierr);
3347       for (k=0; k<bs; k++) {
3348         PetscInt row = bs*i + k;
3349         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3350           PetscInt col = colidxs[j];
3351           if (!sbaij || col >= row) {
3352             if (col >= cs && col < ce) {
3353               if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++;
3354             } else {
3355               ierr = PetscHSetIQueryAdd(ht,col/bs,&done);CHKERRQ(ierr);
3356               if (done) o_nnz[i]++;
3357             }
3358           }
3359         }
3360       }
3361     }
3362     ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
3363     ierr = PetscHSetIDestroy(&ht);CHKERRQ(ierr);
3364     ierr = MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3365     ierr = MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3366     ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3367   }
3368 
3369   /* store matrix values */
3370   for (i=0; i<m; i++) {
3371     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1];
3372     ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr);
3373   }
3374 
3375   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3376   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3377   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3378   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3379   PetscFunctionReturn(0);
3380 }
3381 
3382 PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer)
3383 {
3384   PetscErrorCode ierr;
3385   PetscBool      isbinary;
3386 
3387   PetscFunctionBegin;
3388   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3389   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3390   ierr = MatLoad_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
3391   PetscFunctionReturn(0);
3392 }
3393 
3394 /*@
3395    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3396 
3397    Input Parameters:
3398 +  mat  - the matrix
3399 -  fact - factor
3400 
3401    Not Collective, each process can use a different factor
3402 
3403    Level: advanced
3404 
3405   Notes:
3406    This can also be set by the command line option: -mat_use_hash_table <fact>
3407 
3408 .seealso: MatSetOption()
3409 @*/
3410 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3411 {
3412   PetscErrorCode ierr;
3413 
3414   PetscFunctionBegin;
3415   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3416   PetscFunctionReturn(0);
3417 }
3418 
3419 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3420 {
3421   Mat_MPIBAIJ *baij;
3422 
3423   PetscFunctionBegin;
3424   baij          = (Mat_MPIBAIJ*)mat->data;
3425   baij->ht_fact = fact;
3426   PetscFunctionReturn(0);
3427 }
3428 
3429 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3430 {
3431   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3432   PetscBool      flg;
3433   PetscErrorCode ierr;
3434 
3435   PetscFunctionBegin;
3436   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr);
3437   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input");
3438   if (Ad)     *Ad     = a->A;
3439   if (Ao)     *Ao     = a->B;
3440   if (colmap) *colmap = a->garray;
3441   PetscFunctionReturn(0);
3442 }
3443 
3444 /*
3445     Special version for direct calls from Fortran (to eliminate two function call overheads
3446 */
3447 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3448 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3449 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3450 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3451 #endif
3452 
3453 /*@C
3454   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3455 
3456   Collective on Mat
3457 
3458   Input Parameters:
3459 + mat - the matrix
3460 . min - number of input rows
3461 . im - input rows
3462 . nin - number of input columns
3463 . in - input columns
3464 . v - numerical values input
3465 - addvin - INSERT_VALUES or ADD_VALUES
3466 
3467   Notes:
3468     This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3469 
3470   Level: advanced
3471 
3472 .seealso:   MatSetValuesBlocked()
3473 @*/
3474 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3475 {
3476   /* convert input arguments to C version */
3477   Mat        mat  = *matin;
3478   PetscInt   m    = *min, n = *nin;
3479   InsertMode addv = *addvin;
3480 
3481   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3482   const MatScalar *value;
3483   MatScalar       *barray     = baij->barray;
3484   PetscBool       roworiented = baij->roworiented;
3485   PetscErrorCode  ierr;
3486   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3487   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3488   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3489 
3490   PetscFunctionBegin;
3491   /* tasks normally handled by MatSetValuesBlocked() */
3492   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3493 #if defined(PETSC_USE_DEBUG)
3494   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3495   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3496 #endif
3497   if (mat->assembled) {
3498     mat->was_assembled = PETSC_TRUE;
3499     mat->assembled     = PETSC_FALSE;
3500   }
3501   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3502 
3503 
3504   if (!barray) {
3505     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3506     baij->barray = barray;
3507   }
3508 
3509   if (roworiented) stepval = (n-1)*bs;
3510   else stepval = (m-1)*bs;
3511 
3512   for (i=0; i<m; i++) {
3513     if (im[i] < 0) continue;
3514 #if defined(PETSC_USE_DEBUG)
3515     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3516 #endif
3517     if (im[i] >= rstart && im[i] < rend) {
3518       row = im[i] - rstart;
3519       for (j=0; j<n; j++) {
3520         /* If NumCol = 1 then a copy is not required */
3521         if ((roworiented) && (n == 1)) {
3522           barray = (MatScalar*)v + i*bs2;
3523         } else if ((!roworiented) && (m == 1)) {
3524           barray = (MatScalar*)v + j*bs2;
3525         } else { /* Here a copy is required */
3526           if (roworiented) {
3527             value = v + i*(stepval+bs)*bs + j*bs;
3528           } else {
3529             value = v + j*(stepval+bs)*bs + i*bs;
3530           }
3531           for (ii=0; ii<bs; ii++,value+=stepval) {
3532             for (jj=0; jj<bs; jj++) {
3533               *barray++ = *value++;
3534             }
3535           }
3536           barray -=bs2;
3537         }
3538 
3539         if (in[j] >= cstart && in[j] < cend) {
3540           col  = in[j] - cstart;
3541           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3542         } else if (in[j] < 0) continue;
3543 #if defined(PETSC_USE_DEBUG)
3544         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3545 #endif
3546         else {
3547           if (mat->was_assembled) {
3548             if (!baij->colmap) {
3549               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3550             }
3551 
3552 #if defined(PETSC_USE_DEBUG)
3553 #if defined(PETSC_USE_CTABLE)
3554             { PetscInt data;
3555               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3556               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3557             }
3558 #else
3559             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3560 #endif
3561 #endif
3562 #if defined(PETSC_USE_CTABLE)
3563             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3564             col  = (col - 1)/bs;
3565 #else
3566             col = (baij->colmap[in[j]] - 1)/bs;
3567 #endif
3568             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3569               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3570               col  =  in[j];
3571             }
3572           } else col = in[j];
3573           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3574         }
3575       }
3576     } else {
3577       if (!baij->donotstash) {
3578         if (roworiented) {
3579           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3580         } else {
3581           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3582         }
3583       }
3584     }
3585   }
3586 
3587   /* task normally handled by MatSetValuesBlocked() */
3588   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3589   PetscFunctionReturn(0);
3590 }
3591 
3592 /*@
3593      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block
3594          CSR format the local rows.
3595 
3596    Collective
3597 
3598    Input Parameters:
3599 +  comm - MPI communicator
3600 .  bs - the block size, only a block size of 1 is supported
3601 .  m - number of local rows (Cannot be PETSC_DECIDE)
3602 .  n - This value should be the same as the local size used in creating the
3603        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3604        calculated if N is given) For square matrices n is almost always m.
3605 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3606 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3607 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3608 .   j - column indices
3609 -   a - matrix values
3610 
3611    Output Parameter:
3612 .   mat - the matrix
3613 
3614    Level: intermediate
3615 
3616    Notes:
3617        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3618      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3619      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3620 
3621      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3622      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3623      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3624      with column-major ordering within blocks.
3625 
3626        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3627 
3628 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3629           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3630 @*/
3631 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3632 {
3633   PetscErrorCode ierr;
3634 
3635   PetscFunctionBegin;
3636   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3637   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3638   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3639   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3640   ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr);
3641   ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr);
3642   ierr = MatSetUp(*mat);CHKERRQ(ierr);
3643   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3644   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3645   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3646   PetscFunctionReturn(0);
3647 }
3648 
3649 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3650 {
3651   PetscErrorCode ierr;
3652   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3653   PetscInt       *indx;
3654   PetscScalar    *values;
3655 
3656   PetscFunctionBegin;
3657   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3658   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3659     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
3660     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3661     PetscInt       *bindx,rmax=a->rmax,j;
3662     PetscMPIInt    rank,size;
3663 
3664     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3665     mbs = m/bs; Nbs = N/cbs;
3666     if (n == PETSC_DECIDE) {
3667       ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);
3668     }
3669     nbs = n/cbs;
3670 
3671     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3672     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
3673 
3674     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3675     ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr);
3676     if (rank == size-1) {
3677       /* Check sum(nbs) = Nbs */
3678       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
3679     }
3680 
3681     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3682     for (i=0; i<mbs; i++) {
3683       ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3684       nnz = nnz/bs;
3685       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3686       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3687       ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3688     }
3689     ierr = PetscFree(bindx);CHKERRQ(ierr);
3690 
3691     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3692     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3693     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3694     ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr);
3695     ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3696     ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3697     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3698   }
3699 
3700   /* numeric phase */
3701   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3702   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3703 
3704   for (i=0; i<m; i++) {
3705     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3706     Ii   = i + rstart;
3707     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3708     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3709   }
3710   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3711   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3712   PetscFunctionReturn(0);
3713 }
3714