xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 76be6f4ff3bd4e251c19fc00ebbebfd58b6e7589)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
12 {
13   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ*)A->data;
14   PetscInt          i,*idxb = NULL,m = A->rmap->n,bs = A->cmap->bs;
15   PetscScalar       *va,*vv;
16   Vec               vB,vA;
17   const PetscScalar *vb;
18 
19   PetscFunctionBegin;
20   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA));
21   PetscCall(MatGetRowMaxAbs(a->A,vA,idx));
22 
23   PetscCall(VecGetArrayWrite(vA,&va));
24   if (idx) {
25     for (i=0; i<m; i++) {
26       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
27     }
28   }
29 
30   PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB));
31   PetscCall(PetscMalloc1(m,&idxb));
32   PetscCall(MatGetRowMaxAbs(a->B,vB,idxb));
33 
34   PetscCall(VecGetArrayWrite(v,&vv));
35   PetscCall(VecGetArrayRead(vB,&vb));
36   for (i=0; i<m; i++) {
37     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
38       vv[i] = vb[i];
39       if (idx) idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs);
40     } else {
41       vv[i] = va[i];
42       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs*a->garray[idxb[i]/bs] + (idxb[i] % bs))
43         idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs);
44     }
45   }
46   PetscCall(VecRestoreArrayWrite(vA,&vv));
47   PetscCall(VecRestoreArrayWrite(vA,&va));
48   PetscCall(VecRestoreArrayRead(vB,&vb));
49   PetscCall(PetscFree(idxb));
50   PetscCall(VecDestroy(&vA));
51   PetscCall(VecDestroy(&vB));
52   PetscFunctionReturn(0);
53 }
54 
55 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
56 {
57   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
58 
59   PetscFunctionBegin;
60   PetscCall(MatStoreValues(aij->A));
61   PetscCall(MatStoreValues(aij->B));
62   PetscFunctionReturn(0);
63 }
64 
65 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
66 {
67   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
68 
69   PetscFunctionBegin;
70   PetscCall(MatRetrieveValues(aij->A));
71   PetscCall(MatRetrieveValues(aij->B));
72   PetscFunctionReturn(0);
73 }
74 
75 /*
76      Local utility routine that creates a mapping from the global column
77    number to the local number in the off-diagonal part of the local
78    storage of the matrix.  This is done in a non scalable way since the
79    length of colmap equals the global matrix length.
80 */
81 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
82 {
83   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
84   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
85   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
86 
87   PetscFunctionBegin;
88 #if defined(PETSC_USE_CTABLE)
89   PetscCall(PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap));
90   for (i=0; i<nbs; i++) {
91     PetscCall(PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES));
92   }
93 #else
94   PetscCall(PetscCalloc1(baij->Nbs+1,&baij->colmap));
95   PetscCall(PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt)));
96   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
97 #endif
98   PetscFunctionReturn(0);
99 }
100 
101 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol)       \
102   { \
103     brow = row/bs;  \
104     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
105     rmax = aimax[brow]; nrow = ailen[brow]; \
106     bcol = col/bs; \
107     ridx = row % bs; cidx = col % bs; \
108     low  = 0; high = nrow; \
109     while (high-low > 3) { \
110       t = (low+high)/2; \
111       if (rp[t] > bcol) high = t; \
112       else              low  = t; \
113     } \
114     for (_i=low; _i<high; _i++) { \
115       if (rp[_i] > bcol) break; \
116       if (rp[_i] == bcol) { \
117         bap = ap +  bs2*_i + bs*cidx + ridx; \
118         if (addv == ADD_VALUES) *bap += value;  \
119         else                    *bap  = value;  \
120         goto a_noinsert; \
121       } \
122     } \
123     if (a->nonew == 1) goto a_noinsert; \
124     PetscCheck(a->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
125     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
126     N = nrow++ - 1;  \
127     /* shift up all the later entries in this row */ \
128     PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1));\
129     PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \
130     PetscCall(PetscArrayzero(ap+bs2*_i,bs2));  \
131     rp[_i]                      = bcol;  \
132     ap[bs2*_i + bs*cidx + ridx] = value;  \
133 a_noinsert:; \
134     ailen[brow] = nrow; \
135   }
136 
137 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol)       \
138   { \
139     brow = row/bs;  \
140     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
141     rmax = bimax[brow]; nrow = bilen[brow]; \
142     bcol = col/bs; \
143     ridx = row % bs; cidx = col % bs; \
144     low  = 0; high = nrow; \
145     while (high-low > 3) { \
146       t = (low+high)/2; \
147       if (rp[t] > bcol) high = t; \
148       else              low  = t; \
149     } \
150     for (_i=low; _i<high; _i++) { \
151       if (rp[_i] > bcol) break; \
152       if (rp[_i] == bcol) { \
153         bap = ap +  bs2*_i + bs*cidx + ridx; \
154         if (addv == ADD_VALUES) *bap += value;  \
155         else                    *bap  = value;  \
156         goto b_noinsert; \
157       } \
158     } \
159     if (b->nonew == 1) goto b_noinsert; \
160     PetscCheck(b->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
161     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
162     N = nrow++ - 1;  \
163     /* shift up all the later entries in this row */ \
164     PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1));\
165     PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1)));\
166     PetscCall(PetscArrayzero(ap+bs2*_i,bs2));  \
167     rp[_i]                      = bcol;  \
168     ap[bs2*_i + bs*cidx + ridx] = value;  \
169 b_noinsert:; \
170     bilen[brow] = nrow; \
171   }
172 
173 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
174 {
175   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
176   MatScalar      value;
177   PetscBool      roworiented = baij->roworiented;
178   PetscInt       i,j,row,col;
179   PetscInt       rstart_orig=mat->rmap->rstart;
180   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
181   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
182 
183   /* Some Variables required in the macro */
184   Mat         A     = baij->A;
185   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
186   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
187   MatScalar   *aa   =a->a;
188 
189   Mat         B     = baij->B;
190   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
191   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
192   MatScalar   *ba   =b->a;
193 
194   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
195   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
196   MatScalar *ap,*bap;
197 
198   PetscFunctionBegin;
199   for (i=0; i<m; i++) {
200     if (im[i] < 0) continue;
201     PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
202     if (im[i] >= rstart_orig && im[i] < rend_orig) {
203       row = im[i] - rstart_orig;
204       for (j=0; j<n; j++) {
205         if (in[j] >= cstart_orig && in[j] < cend_orig) {
206           col = in[j] - cstart_orig;
207           if (roworiented) value = v[i*n+j];
208           else             value = v[i+j*m];
209           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
210         } else if (in[j] < 0) continue;
211         else PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
212         else {
213           if (mat->was_assembled) {
214             if (!baij->colmap) {
215               PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
216             }
217 #if defined(PETSC_USE_CTABLE)
218             PetscCall(PetscTableFind(baij->colmap,in[j]/bs + 1,&col));
219             col  = col - 1;
220 #else
221             col = baij->colmap[in[j]/bs] - 1;
222 #endif
223             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
224               PetscCall(MatDisAssemble_MPIBAIJ(mat));
225               col  =  in[j];
226               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
227               B    = baij->B;
228               b    = (Mat_SeqBAIJ*)(B)->data;
229               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
230               ba   =b->a;
231             } else PetscCheck(col >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
232             else col += in[j]%bs;
233           } else col = in[j];
234           if (roworiented) value = v[i*n+j];
235           else             value = v[i+j*m];
236           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
237           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
238         }
239       }
240     } else {
241       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
242       if (!baij->donotstash) {
243         mat->assembled = PETSC_FALSE;
244         if (roworiented) {
245           PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE));
246         } else {
247           PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE));
248         }
249       }
250     }
251   }
252   PetscFunctionReturn(0);
253 }
254 
255 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
256 {
257   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
258   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
259   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
260   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
261   PetscBool         roworiented=a->roworiented;
262   const PetscScalar *value     = v;
263   MatScalar         *ap,*aa = a->a,*bap;
264 
265   PetscFunctionBegin;
266   rp   = aj + ai[row];
267   ap   = aa + bs2*ai[row];
268   rmax = imax[row];
269   nrow = ailen[row];
270   value = v;
271   low = 0;
272   high = nrow;
273   while (high-low > 7) {
274     t = (low+high)/2;
275     if (rp[t] > col) high = t;
276     else             low  = t;
277   }
278   for (i=low; i<high; i++) {
279     if (rp[i] > col) break;
280     if (rp[i] == col) {
281       bap = ap +  bs2*i;
282       if (roworiented) {
283         if (is == ADD_VALUES) {
284           for (ii=0; ii<bs; ii++) {
285             for (jj=ii; jj<bs2; jj+=bs) {
286               bap[jj] += *value++;
287             }
288           }
289         } else {
290           for (ii=0; ii<bs; ii++) {
291             for (jj=ii; jj<bs2; jj+=bs) {
292               bap[jj] = *value++;
293             }
294           }
295         }
296       } else {
297         if (is == ADD_VALUES) {
298           for (ii=0; ii<bs; ii++,value+=bs) {
299             for (jj=0; jj<bs; jj++) {
300               bap[jj] += value[jj];
301             }
302             bap += bs;
303           }
304         } else {
305           for (ii=0; ii<bs; ii++,value+=bs) {
306             for (jj=0; jj<bs; jj++) {
307               bap[jj]  = value[jj];
308             }
309             bap += bs;
310           }
311         }
312       }
313       goto noinsert2;
314     }
315   }
316   if (nonew == 1) goto noinsert2;
317   PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
318   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
319   N = nrow++ - 1; high++;
320   /* shift up all the later entries in this row */
321   PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1));
322   PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1)));
323   rp[i] = col;
324   bap   = ap +  bs2*i;
325   if (roworiented) {
326     for (ii=0; ii<bs; ii++) {
327       for (jj=ii; jj<bs2; jj+=bs) {
328         bap[jj] = *value++;
329       }
330     }
331   } else {
332     for (ii=0; ii<bs; ii++) {
333       for (jj=0; jj<bs; jj++) {
334         *bap++ = *value++;
335       }
336     }
337   }
338   noinsert2:;
339   ailen[row] = nrow;
340   PetscFunctionReturn(0);
341 }
342 
343 /*
344     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
345     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
346 */
347 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
348 {
349   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
350   const PetscScalar *value;
351   MatScalar         *barray     = baij->barray;
352   PetscBool         roworiented = baij->roworiented;
353   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
354   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
355   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
356 
357   PetscFunctionBegin;
358   if (!barray) {
359     PetscCall(PetscMalloc1(bs2,&barray));
360     baij->barray = barray;
361   }
362 
363   if (roworiented) stepval = (n-1)*bs;
364   else stepval = (m-1)*bs;
365 
366   for (i=0; i<m; i++) {
367     if (im[i] < 0) continue;
368     PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1);
369     if (im[i] >= rstart && im[i] < rend) {
370       row = im[i] - rstart;
371       for (j=0; j<n; j++) {
372         /* If NumCol = 1 then a copy is not required */
373         if ((roworiented) && (n == 1)) {
374           barray = (MatScalar*)v + i*bs2;
375         } else if ((!roworiented) && (m == 1)) {
376           barray = (MatScalar*)v + j*bs2;
377         } else { /* Here a copy is required */
378           if (roworiented) {
379             value = v + (i*(stepval+bs) + j)*bs;
380           } else {
381             value = v + (j*(stepval+bs) + i)*bs;
382           }
383           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
384             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
385             barray += bs;
386           }
387           barray -= bs2;
388         }
389 
390         if (in[j] >= cstart && in[j] < cend) {
391           col  = in[j] - cstart;
392           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]));
393         } else if (in[j] < 0) continue;
394         else PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1);
395         else {
396           if (mat->was_assembled) {
397             if (!baij->colmap) {
398               PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
399             }
400 
401 #if defined(PETSC_USE_DEBUG)
402 #if defined(PETSC_USE_CTABLE)
403             { PetscInt data;
404               PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data));
405               PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
406             }
407 #else
408             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
409 #endif
410 #endif
411 #if defined(PETSC_USE_CTABLE)
412             PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col));
413             col  = (col - 1)/bs;
414 #else
415             col = (baij->colmap[in[j]] - 1)/bs;
416 #endif
417             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
418               PetscCall(MatDisAssemble_MPIBAIJ(mat));
419               col  =  in[j];
420             } else PetscCheck(col >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix",im[i],in[j]);
421           } else col = in[j];
422           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]));
423         }
424       }
425     } else {
426       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
427       if (!baij->donotstash) {
428         if (roworiented) {
429           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
430         } else {
431           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
432         }
433       }
434     }
435   }
436   PetscFunctionReturn(0);
437 }
438 
439 #define HASH_KEY 0.6180339887
440 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
441 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
442 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
443 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
444 {
445   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
446   PetscBool      roworiented = baij->roworiented;
447   PetscInt       i,j,row,col;
448   PetscInt       rstart_orig=mat->rmap->rstart;
449   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
450   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
451   PetscReal      tmp;
452   MatScalar      **HD = baij->hd,value;
453   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
454 
455   PetscFunctionBegin;
456   for (i=0; i<m; i++) {
457     if (PetscDefined(USE_DEBUG)) {
458       PetscCheck(im[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
459       PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
460     }
461     row = im[i];
462     if (row >= rstart_orig && row < rend_orig) {
463       for (j=0; j<n; j++) {
464         col = in[j];
465         if (roworiented) value = v[i*n+j];
466         else             value = v[i+j*m];
467         /* Look up PetscInto the Hash Table */
468         key = (row/bs)*Nbs+(col/bs)+1;
469         h1  = HASH(size,key,tmp);
470 
471         idx = h1;
472         if (PetscDefined(USE_DEBUG)) {
473           insert_ct++;
474           total_ct++;
475           if (HT[idx] != key) {
476             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
477             if (idx == size) {
478               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
479               PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
480             }
481           }
482         } else if (HT[idx] != key) {
483           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
484           if (idx == size) {
485             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
486             PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
487           }
488         }
489         /* A HASH table entry is found, so insert the values at the correct address */
490         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
491         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
492       }
493     } else if (!baij->donotstash) {
494       if (roworiented) {
495         PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE));
496       } else {
497         PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE));
498       }
499     }
500   }
501   if (PetscDefined(USE_DEBUG)) {
502     baij->ht_total_ct  += total_ct;
503     baij->ht_insert_ct += insert_ct;
504   }
505   PetscFunctionReturn(0);
506 }
507 
508 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
509 {
510   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
511   PetscBool         roworiented = baij->roworiented;
512   PetscInt          i,j,ii,jj,row,col;
513   PetscInt          rstart=baij->rstartbs;
514   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
515   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
516   PetscReal         tmp;
517   MatScalar         **HD = baij->hd,*baij_a;
518   const PetscScalar *v_t,*value;
519   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
520 
521   PetscFunctionBegin;
522   if (roworiented) stepval = (n-1)*bs;
523   else stepval = (m-1)*bs;
524 
525   for (i=0; i<m; i++) {
526     if (PetscDefined(USE_DEBUG)) {
527       PetscCheck(im[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %" PetscInt_FMT,im[i]);
528       PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1);
529     }
530     row = im[i];
531     v_t = v + i*nbs2;
532     if (row >= rstart && row < rend) {
533       for (j=0; j<n; j++) {
534         col = in[j];
535 
536         /* Look up into the Hash Table */
537         key = row*Nbs+col+1;
538         h1  = HASH(size,key,tmp);
539 
540         idx = h1;
541         if (PetscDefined(USE_DEBUG)) {
542           total_ct++;
543           insert_ct++;
544           if (HT[idx] != key) {
545             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
546             if (idx == size) {
547               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
548               PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
549             }
550           }
551         } else if (HT[idx] != key) {
552           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
553           if (idx == size) {
554             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
555             PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
556           }
557         }
558         baij_a = HD[idx];
559         if (roworiented) {
560           /*value = v + i*(stepval+bs)*bs + j*bs;*/
561           /* value = v + (i*(stepval+bs)+j)*bs; */
562           value = v_t;
563           v_t  += bs;
564           if (addv == ADD_VALUES) {
565             for (ii=0; ii<bs; ii++,value+=stepval) {
566               for (jj=ii; jj<bs2; jj+=bs) {
567                 baij_a[jj] += *value++;
568               }
569             }
570           } else {
571             for (ii=0; ii<bs; ii++,value+=stepval) {
572               for (jj=ii; jj<bs2; jj+=bs) {
573                 baij_a[jj] = *value++;
574               }
575             }
576           }
577         } else {
578           value = v + j*(stepval+bs)*bs + i*bs;
579           if (addv == ADD_VALUES) {
580             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
581               for (jj=0; jj<bs; jj++) {
582                 baij_a[jj] += *value++;
583               }
584             }
585           } else {
586             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
587               for (jj=0; jj<bs; jj++) {
588                 baij_a[jj] = *value++;
589               }
590             }
591           }
592         }
593       }
594     } else {
595       if (!baij->donotstash) {
596         if (roworiented) {
597           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
598         } else {
599           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
600         }
601       }
602     }
603   }
604   if (PetscDefined(USE_DEBUG)) {
605     baij->ht_total_ct  += total_ct;
606     baij->ht_insert_ct += insert_ct;
607   }
608   PetscFunctionReturn(0);
609 }
610 
611 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
612 {
613   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
614   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
615   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
616 
617   PetscFunctionBegin;
618   for (i=0; i<m; i++) {
619     if (idxm[i] < 0) continue; /* negative row */
620     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
621     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
622       row = idxm[i] - bsrstart;
623       for (j=0; j<n; j++) {
624         if (idxn[j] < 0) continue; /* negative column */
625         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
626         if (idxn[j] >= bscstart && idxn[j] < bscend) {
627           col  = idxn[j] - bscstart;
628           PetscCall(MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j));
629         } else {
630           if (!baij->colmap) {
631             PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
632           }
633 #if defined(PETSC_USE_CTABLE)
634           PetscCall(PetscTableFind(baij->colmap,idxn[j]/bs+1,&data));
635           data--;
636 #else
637           data = baij->colmap[idxn[j]/bs]-1;
638 #endif
639           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
640           else {
641             col  = data + idxn[j]%bs;
642             PetscCall(MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j));
643           }
644         }
645       }
646     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
647   }
648   PetscFunctionReturn(0);
649 }
650 
651 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
652 {
653   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
654   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
655   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
656   PetscReal      sum = 0.0;
657   MatScalar      *v;
658 
659   PetscFunctionBegin;
660   if (baij->size == 1) {
661     PetscCall(MatNorm(baij->A,type,nrm));
662   } else {
663     if (type == NORM_FROBENIUS) {
664       v  = amat->a;
665       nz = amat->nz*bs2;
666       for (i=0; i<nz; i++) {
667         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
668       }
669       v  = bmat->a;
670       nz = bmat->nz*bs2;
671       for (i=0; i<nz; i++) {
672         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
673       }
674       PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
675       *nrm = PetscSqrtReal(*nrm);
676     } else if (type == NORM_1) { /* max column sum */
677       PetscReal *tmp,*tmp2;
678       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
679       PetscCall(PetscCalloc1(mat->cmap->N,&tmp));
680       PetscCall(PetscMalloc1(mat->cmap->N,&tmp2));
681       v    = amat->a; jj = amat->j;
682       for (i=0; i<amat->nz; i++) {
683         for (j=0; j<bs; j++) {
684           col = bs*(cstart + *jj) + j; /* column index */
685           for (row=0; row<bs; row++) {
686             tmp[col] += PetscAbsScalar(*v);  v++;
687           }
688         }
689         jj++;
690       }
691       v = bmat->a; jj = bmat->j;
692       for (i=0; i<bmat->nz; i++) {
693         for (j=0; j<bs; j++) {
694           col = bs*garray[*jj] + j;
695           for (row=0; row<bs; row++) {
696             tmp[col] += PetscAbsScalar(*v); v++;
697           }
698         }
699         jj++;
700       }
701       PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
702       *nrm = 0.0;
703       for (j=0; j<mat->cmap->N; j++) {
704         if (tmp2[j] > *nrm) *nrm = tmp2[j];
705       }
706       PetscCall(PetscFree(tmp));
707       PetscCall(PetscFree(tmp2));
708     } else if (type == NORM_INFINITY) { /* max row sum */
709       PetscReal *sums;
710       PetscCall(PetscMalloc1(bs,&sums));
711       sum  = 0.0;
712       for (j=0; j<amat->mbs; j++) {
713         for (row=0; row<bs; row++) sums[row] = 0.0;
714         v  = amat->a + bs2*amat->i[j];
715         nz = amat->i[j+1]-amat->i[j];
716         for (i=0; i<nz; i++) {
717           for (col=0; col<bs; col++) {
718             for (row=0; row<bs; row++) {
719               sums[row] += PetscAbsScalar(*v); v++;
720             }
721           }
722         }
723         v  = bmat->a + bs2*bmat->i[j];
724         nz = bmat->i[j+1]-bmat->i[j];
725         for (i=0; i<nz; i++) {
726           for (col=0; col<bs; col++) {
727             for (row=0; row<bs; row++) {
728               sums[row] += PetscAbsScalar(*v); v++;
729             }
730           }
731         }
732         for (row=0; row<bs; row++) {
733           if (sums[row] > sum) sum = sums[row];
734         }
735       }
736       PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat)));
737       PetscCall(PetscFree(sums));
738     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
739   }
740   PetscFunctionReturn(0);
741 }
742 
743 /*
744   Creates the hash table, and sets the table
745   This table is created only once.
746   If new entried need to be added to the matrix
747   then the hash table has to be destroyed and
748   recreated.
749 */
750 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
751 {
752   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
753   Mat            A     = baij->A,B=baij->B;
754   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
755   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
756   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
757   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
758   PetscInt       *HT,key;
759   MatScalar      **HD;
760   PetscReal      tmp;
761 #if defined(PETSC_USE_INFO)
762   PetscInt ct=0,max=0;
763 #endif
764 
765   PetscFunctionBegin;
766   if (baij->ht) PetscFunctionReturn(0);
767 
768   baij->ht_size = (PetscInt)(factor*nz);
769   ht_size       = baij->ht_size;
770 
771   /* Allocate Memory for Hash Table */
772   PetscCall(PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht));
773   HD   = baij->hd;
774   HT   = baij->ht;
775 
776   /* Loop Over A */
777   for (i=0; i<a->mbs; i++) {
778     for (j=ai[i]; j<ai[i+1]; j++) {
779       row = i+rstart;
780       col = aj[j]+cstart;
781 
782       key = row*Nbs + col + 1;
783       h1  = HASH(ht_size,key,tmp);
784       for (k=0; k<ht_size; k++) {
785         if (!HT[(h1+k)%ht_size]) {
786           HT[(h1+k)%ht_size] = key;
787           HD[(h1+k)%ht_size] = a->a + j*bs2;
788           break;
789 #if defined(PETSC_USE_INFO)
790         } else {
791           ct++;
792 #endif
793         }
794       }
795 #if defined(PETSC_USE_INFO)
796       if (k> max) max = k;
797 #endif
798     }
799   }
800   /* Loop Over B */
801   for (i=0; i<b->mbs; i++) {
802     for (j=bi[i]; j<bi[i+1]; j++) {
803       row = i+rstart;
804       col = garray[bj[j]];
805       key = row*Nbs + col + 1;
806       h1  = HASH(ht_size,key,tmp);
807       for (k=0; k<ht_size; k++) {
808         if (!HT[(h1+k)%ht_size]) {
809           HT[(h1+k)%ht_size] = key;
810           HD[(h1+k)%ht_size] = b->a + j*bs2;
811           break;
812 #if defined(PETSC_USE_INFO)
813         } else {
814           ct++;
815 #endif
816         }
817       }
818 #if defined(PETSC_USE_INFO)
819       if (k> max) max = k;
820 #endif
821     }
822   }
823 
824   /* Print Summary */
825 #if defined(PETSC_USE_INFO)
826   for (i=0,j=0; i<ht_size; i++) {
827     if (HT[i]) j++;
828   }
829   PetscCall(PetscInfo(mat,"Average Search = %5.2g,max search = %" PetscInt_FMT "\n",(!j) ? (double)0.0:(double)(((PetscReal)(ct+j))/(double)j),max));
830 #endif
831   PetscFunctionReturn(0);
832 }
833 
834 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
835 {
836   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
837   PetscInt       nstash,reallocs;
838 
839   PetscFunctionBegin;
840   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
841 
842   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
843   PetscCall(MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs));
844   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
845   PetscCall(PetscInfo(mat,"Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
846   PetscCall(MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs));
847   PetscCall(PetscInfo(mat,"Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
848   PetscFunctionReturn(0);
849 }
850 
851 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
852 {
853   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
854   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
855   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
856   PetscInt       *row,*col;
857   PetscBool      r1,r2,r3,other_disassembled;
858   MatScalar      *val;
859   PetscMPIInt    n;
860 
861   PetscFunctionBegin;
862   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
863   if (!baij->donotstash && !mat->nooffprocentries) {
864     while (1) {
865       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
866       if (!flg) break;
867 
868       for (i=0; i<n;) {
869         /* Now identify the consecutive vals belonging to the same row */
870         for (j=i,rstart=row[j]; j<n; j++) {
871           if (row[j] != rstart) break;
872         }
873         if (j < n) ncols = j-i;
874         else       ncols = n-i;
875         /* Now assemble all these values with a single function call */
876         PetscCall(MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
877         i    = j;
878       }
879     }
880     PetscCall(MatStashScatterEnd_Private(&mat->stash));
881     /* Now process the block-stash. Since the values are stashed column-oriented,
882        set the roworiented flag to column oriented, and after MatSetValues()
883        restore the original flags */
884     r1 = baij->roworiented;
885     r2 = a->roworiented;
886     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
887 
888     baij->roworiented = PETSC_FALSE;
889     a->roworiented    = PETSC_FALSE;
890 
891     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
892     while (1) {
893       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg));
894       if (!flg) break;
895 
896       for (i=0; i<n;) {
897         /* Now identify the consecutive vals belonging to the same row */
898         for (j=i,rstart=row[j]; j<n; j++) {
899           if (row[j] != rstart) break;
900         }
901         if (j < n) ncols = j-i;
902         else       ncols = n-i;
903         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode));
904         i    = j;
905       }
906     }
907     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
908 
909     baij->roworiented = r1;
910     a->roworiented    = r2;
911 
912     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
913   }
914 
915   PetscCall(MatAssemblyBegin(baij->A,mode));
916   PetscCall(MatAssemblyEnd(baij->A,mode));
917 
918   /* determine if any processor has disassembled, if so we must
919      also disassemble ourselves, in order that we may reassemble. */
920   /*
921      if nonzero structure of submatrix B cannot change then we know that
922      no processor disassembled thus we can skip this stuff
923   */
924   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
925     PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat)));
926     if (mat->was_assembled && !other_disassembled) {
927       PetscCall(MatDisAssemble_MPIBAIJ(mat));
928     }
929   }
930 
931   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
932     PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
933   }
934   PetscCall(MatAssemblyBegin(baij->B,mode));
935   PetscCall(MatAssemblyEnd(baij->B,mode));
936 
937 #if defined(PETSC_USE_INFO)
938   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
939     PetscCall(PetscInfo(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct));
940 
941     baij->ht_total_ct  = 0;
942     baij->ht_insert_ct = 0;
943   }
944 #endif
945   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
946     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact));
947 
948     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
949     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
950   }
951 
952   PetscCall(PetscFree2(baij->rowvalues,baij->rowindices));
953 
954   baij->rowvalues = NULL;
955 
956   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
957   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
958     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
959     PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat)));
960   }
961   PetscFunctionReturn(0);
962 }
963 
964 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
965 #include <petscdraw.h>
966 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
967 {
968   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
969   PetscMPIInt       rank = baij->rank;
970   PetscInt          bs   = mat->rmap->bs;
971   PetscBool         iascii,isdraw;
972   PetscViewer       sviewer;
973   PetscViewerFormat format;
974 
975   PetscFunctionBegin;
976   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
977   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
978   if (iascii) {
979     PetscCall(PetscViewerGetFormat(viewer,&format));
980     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
981       MatInfo info;
982       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
983       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
984       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
985       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n",
986                                                  rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory));
987       PetscCall(MatGetInfo(baij->A,MAT_LOCAL,&info));
988       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
989       PetscCall(MatGetInfo(baij->B,MAT_LOCAL,&info));
990       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
991       PetscCall(PetscViewerFlush(viewer));
992       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
993       PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n"));
994       PetscCall(VecScatterView(baij->Mvctx,viewer));
995       PetscFunctionReturn(0);
996     } else if (format == PETSC_VIEWER_ASCII_INFO) {
997       PetscCall(PetscViewerASCIIPrintf(viewer,"  block size is %" PetscInt_FMT "\n",bs));
998       PetscFunctionReturn(0);
999     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1000       PetscFunctionReturn(0);
1001     }
1002   }
1003 
1004   if (isdraw) {
1005     PetscDraw draw;
1006     PetscBool isnull;
1007     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
1008     PetscCall(PetscDrawIsNull(draw,&isnull));
1009     if (isnull) PetscFunctionReturn(0);
1010   }
1011 
1012   {
1013     /* assemble the entire matrix onto first processor. */
1014     Mat         A;
1015     Mat_SeqBAIJ *Aloc;
1016     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1017     MatScalar   *a;
1018     const char  *matname;
1019 
1020     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1021     /* Perhaps this should be the type of mat? */
1022     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A));
1023     if (rank == 0) {
1024       PetscCall(MatSetSizes(A,M,N,M,N));
1025     } else {
1026       PetscCall(MatSetSizes(A,0,0,M,N));
1027     }
1028     PetscCall(MatSetType(A,MATMPIBAIJ));
1029     PetscCall(MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL));
1030     PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE));
1031     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A));
1032 
1033     /* copy over the A part */
1034     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1035     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1036     PetscCall(PetscMalloc1(bs,&rvals));
1037 
1038     for (i=0; i<mbs; i++) {
1039       rvals[0] = bs*(baij->rstartbs + i);
1040       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1041       for (j=ai[i]; j<ai[i+1]; j++) {
1042         col = (baij->cstartbs+aj[j])*bs;
1043         for (k=0; k<bs; k++) {
1044           PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES));
1045           col++; a += bs;
1046         }
1047       }
1048     }
1049     /* copy over the B part */
1050     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1051     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1052     for (i=0; i<mbs; i++) {
1053       rvals[0] = bs*(baij->rstartbs + i);
1054       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1055       for (j=ai[i]; j<ai[i+1]; j++) {
1056         col = baij->garray[aj[j]]*bs;
1057         for (k=0; k<bs; k++) {
1058           PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES));
1059           col++; a += bs;
1060         }
1061       }
1062     }
1063     PetscCall(PetscFree(rvals));
1064     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
1065     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
1066     /*
1067        Everyone has to call to draw the matrix since the graphics waits are
1068        synchronized across all processors that share the PetscDraw object
1069     */
1070     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1071     PetscCall(PetscObjectGetName((PetscObject)mat,&matname));
1072     if (rank == 0) {
1073       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname));
1074       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer));
1075     }
1076     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
1077     PetscCall(PetscViewerFlush(viewer));
1078     PetscCall(MatDestroy(&A));
1079   }
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1084 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1085 {
1086   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
1087   Mat_SeqBAIJ    *A   = (Mat_SeqBAIJ*)aij->A->data;
1088   Mat_SeqBAIJ    *B   = (Mat_SeqBAIJ*)aij->B->data;
1089   const PetscInt *garray = aij->garray;
1090   PetscInt       header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l;
1091   PetscInt       *rowlens,*colidxs;
1092   PetscScalar    *matvals;
1093 
1094   PetscFunctionBegin;
1095   PetscCall(PetscViewerSetUp(viewer));
1096 
1097   M  = mat->rmap->N;
1098   N  = mat->cmap->N;
1099   m  = mat->rmap->n;
1100   rs = mat->rmap->rstart;
1101   cs = mat->cmap->rstart;
1102   bs = mat->rmap->bs;
1103   nz = bs*bs*(A->nz + B->nz);
1104 
1105   /* write matrix header */
1106   header[0] = MAT_FILE_CLASSID;
1107   header[1] = M; header[2] = N; header[3] = nz;
1108   PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat)));
1109   PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT));
1110 
1111   /* fill in and store row lengths */
1112   PetscCall(PetscMalloc1(m,&rowlens));
1113   for (cnt=0, i=0; i<A->mbs; i++)
1114     for (j=0; j<bs; j++)
1115       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]);
1116   PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT));
1117   PetscCall(PetscFree(rowlens));
1118 
1119   /* fill in and store column indices */
1120   PetscCall(PetscMalloc1(nz,&colidxs));
1121   for (cnt=0, i=0; i<A->mbs; i++) {
1122     for (k=0; k<bs; k++) {
1123       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1124         if (garray[B->j[jb]] > cs/bs) break;
1125         for (l=0; l<bs; l++)
1126           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1127       }
1128       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1129         for (l=0; l<bs; l++)
1130           colidxs[cnt++] = bs*A->j[ja] + l + cs;
1131       for (; jb<B->i[i+1]; jb++)
1132         for (l=0; l<bs; l++)
1133           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1134     }
1135   }
1136   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1137   PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT));
1138   PetscCall(PetscFree(colidxs));
1139 
1140   /* fill in and store nonzero values */
1141   PetscCall(PetscMalloc1(nz,&matvals));
1142   for (cnt=0, i=0; i<A->mbs; i++) {
1143     for (k=0; k<bs; k++) {
1144       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1145         if (garray[B->j[jb]] > cs/bs) break;
1146         for (l=0; l<bs; l++)
1147           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1148       }
1149       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1150         for (l=0; l<bs; l++)
1151           matvals[cnt++] = A->a[bs*(bs*ja + l) + k];
1152       for (; jb<B->i[i+1]; jb++)
1153         for (l=0; l<bs; l++)
1154           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1155     }
1156   }
1157   PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR));
1158   PetscCall(PetscFree(matvals));
1159 
1160   /* write block size option to the viewer's .info file */
1161   PetscCall(MatView_Binary_BlockSizes(mat,viewer));
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1166 {
1167   PetscBool      iascii,isdraw,issocket,isbinary;
1168 
1169   PetscFunctionBegin;
1170   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1171   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1172   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
1173   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1174   if (iascii || isdraw || issocket) {
1175     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer));
1176   } else if (isbinary) {
1177     PetscCall(MatView_MPIBAIJ_Binary(mat,viewer));
1178   }
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1183 {
1184   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1185 
1186   PetscFunctionBegin;
1187 #if defined(PETSC_USE_LOG)
1188   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
1189 #endif
1190   PetscCall(MatStashDestroy_Private(&mat->stash));
1191   PetscCall(MatStashDestroy_Private(&mat->bstash));
1192   PetscCall(MatDestroy(&baij->A));
1193   PetscCall(MatDestroy(&baij->B));
1194 #if defined(PETSC_USE_CTABLE)
1195   PetscCall(PetscTableDestroy(&baij->colmap));
1196 #else
1197   PetscCall(PetscFree(baij->colmap));
1198 #endif
1199   PetscCall(PetscFree(baij->garray));
1200   PetscCall(VecDestroy(&baij->lvec));
1201   PetscCall(VecScatterDestroy(&baij->Mvctx));
1202   PetscCall(PetscFree2(baij->rowvalues,baij->rowindices));
1203   PetscCall(PetscFree(baij->barray));
1204   PetscCall(PetscFree2(baij->hd,baij->ht));
1205   PetscCall(PetscFree(baij->rangebs));
1206   PetscCall(PetscFree(mat->data));
1207 
1208   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
1209   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL));
1210   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL));
1211   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL));
1212   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL));
1213   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL));
1214   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL));
1215   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL));
1216   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL));
1217 #if defined(PETSC_HAVE_HYPRE)
1218   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL));
1219 #endif
1220   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL));
1221   PetscFunctionReturn(0);
1222 }
1223 
1224 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1225 {
1226   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1227   PetscInt       nt;
1228 
1229   PetscFunctionBegin;
1230   PetscCall(VecGetLocalSize(xx,&nt));
1231   PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1232   PetscCall(VecGetLocalSize(yy,&nt));
1233   PetscCheck(nt == A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1234   PetscCall(VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
1235   PetscCall((*a->A->ops->mult)(a->A,xx,yy));
1236   PetscCall(VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
1237   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy));
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1242 {
1243   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1244 
1245   PetscFunctionBegin;
1246   PetscCall(VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
1247   PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz));
1248   PetscCall(VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD));
1249   PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz));
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1254 {
1255   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1256 
1257   PetscFunctionBegin;
1258   /* do nondiagonal part */
1259   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1260   /* do local part */
1261   PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy));
1262   /* add partial results together */
1263   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1264   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE));
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1269 {
1270   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1271 
1272   PetscFunctionBegin;
1273   /* do nondiagonal part */
1274   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec));
1275   /* do local part */
1276   PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz));
1277   /* add partial results together */
1278   PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1279   PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE));
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 /*
1284   This only works correctly for square matrices where the subblock A->A is the
1285    diagonal block
1286 */
1287 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1288 {
1289   PetscFunctionBegin;
1290   PetscCheck(A->rmap->N == A->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1291   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ*)A->data)->A,v));
1292   PetscFunctionReturn(0);
1293 }
1294 
1295 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1296 {
1297   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1298 
1299   PetscFunctionBegin;
1300   PetscCall(MatScale(a->A,aa));
1301   PetscCall(MatScale(a->B,aa));
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1306 {
1307   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1308   PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1309   PetscInt    bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1310   PetscInt    nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1311   PetscInt    *cmap,*idx_p,cstart = mat->cstartbs;
1312 
1313   PetscFunctionBegin;
1314   PetscCheck(row >= brstart && row < brend,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1315   PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1316   mat->getrowactive = PETSC_TRUE;
1317 
1318   if (!mat->rowvalues && (idx || v)) {
1319     /*
1320         allocate enough space to hold information from the longest row.
1321     */
1322     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1323     PetscInt    max = 1,mbs = mat->mbs,tmp;
1324     for (i=0; i<mbs; i++) {
1325       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1326       if (max < tmp) max = tmp;
1327     }
1328     PetscCall(PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices));
1329   }
1330   lrow = row - brstart;
1331 
1332   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1333   if (!v)   {pvA = NULL; pvB = NULL;}
1334   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1335   PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA));
1336   PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB));
1337   nztot = nzA + nzB;
1338 
1339   cmap = mat->garray;
1340   if (v  || idx) {
1341     if (nztot) {
1342       /* Sort by increasing column numbers, assuming A and B already sorted */
1343       PetscInt imark = -1;
1344       if (v) {
1345         *v = v_p = mat->rowvalues;
1346         for (i=0; i<nzB; i++) {
1347           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1348           else break;
1349         }
1350         imark = i;
1351         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1352         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1353       }
1354       if (idx) {
1355         *idx = idx_p = mat->rowindices;
1356         if (imark > -1) {
1357           for (i=0; i<imark; i++) {
1358             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1359           }
1360         } else {
1361           for (i=0; i<nzB; i++) {
1362             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1363             else break;
1364           }
1365           imark = i;
1366         }
1367         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1368         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1369       }
1370     } else {
1371       if (idx) *idx = NULL;
1372       if (v)   *v   = NULL;
1373     }
1374   }
1375   *nz  = nztot;
1376   PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA));
1377   PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB));
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1382 {
1383   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1384 
1385   PetscFunctionBegin;
1386   PetscCheck(baij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1387   baij->getrowactive = PETSC_FALSE;
1388   PetscFunctionReturn(0);
1389 }
1390 
1391 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1392 {
1393   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1394 
1395   PetscFunctionBegin;
1396   PetscCall(MatZeroEntries(l->A));
1397   PetscCall(MatZeroEntries(l->B));
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1402 {
1403   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1404   Mat            A  = a->A,B = a->B;
1405   PetscLogDouble isend[5],irecv[5];
1406 
1407   PetscFunctionBegin;
1408   info->block_size = (PetscReal)matin->rmap->bs;
1409 
1410   PetscCall(MatGetInfo(A,MAT_LOCAL,info));
1411 
1412   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1413   isend[3] = info->memory;  isend[4] = info->mallocs;
1414 
1415   PetscCall(MatGetInfo(B,MAT_LOCAL,info));
1416 
1417   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1418   isend[3] += info->memory;  isend[4] += info->mallocs;
1419 
1420   if (flag == MAT_LOCAL) {
1421     info->nz_used      = isend[0];
1422     info->nz_allocated = isend[1];
1423     info->nz_unneeded  = isend[2];
1424     info->memory       = isend[3];
1425     info->mallocs      = isend[4];
1426   } else if (flag == MAT_GLOBAL_MAX) {
1427     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin)));
1428 
1429     info->nz_used      = irecv[0];
1430     info->nz_allocated = irecv[1];
1431     info->nz_unneeded  = irecv[2];
1432     info->memory       = irecv[3];
1433     info->mallocs      = irecv[4];
1434   } else if (flag == MAT_GLOBAL_SUM) {
1435     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin)));
1436 
1437     info->nz_used      = irecv[0];
1438     info->nz_allocated = irecv[1];
1439     info->nz_unneeded  = irecv[2];
1440     info->memory       = irecv[3];
1441     info->mallocs      = irecv[4];
1442   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1443   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1444   info->fill_ratio_needed = 0;
1445   info->factor_mallocs    = 0;
1446   PetscFunctionReturn(0);
1447 }
1448 
1449 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1450 {
1451   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1452 
1453   PetscFunctionBegin;
1454   switch (op) {
1455   case MAT_NEW_NONZERO_LOCATIONS:
1456   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1457   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1458   case MAT_KEEP_NONZERO_PATTERN:
1459   case MAT_NEW_NONZERO_LOCATION_ERR:
1460     MatCheckPreallocated(A,1);
1461     PetscCall(MatSetOption(a->A,op,flg));
1462     PetscCall(MatSetOption(a->B,op,flg));
1463     break;
1464   case MAT_ROW_ORIENTED:
1465     MatCheckPreallocated(A,1);
1466     a->roworiented = flg;
1467 
1468     PetscCall(MatSetOption(a->A,op,flg));
1469     PetscCall(MatSetOption(a->B,op,flg));
1470     break;
1471   case MAT_FORCE_DIAGONAL_ENTRIES:
1472   case MAT_SORTED_FULL:
1473     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1474     break;
1475   case MAT_IGNORE_OFF_PROC_ENTRIES:
1476     a->donotstash = flg;
1477     break;
1478   case MAT_USE_HASH_TABLE:
1479     a->ht_flag = flg;
1480     a->ht_fact = 1.39;
1481     break;
1482   case MAT_SYMMETRIC:
1483   case MAT_STRUCTURALLY_SYMMETRIC:
1484   case MAT_HERMITIAN:
1485   case MAT_SUBMAT_SINGLEIS:
1486   case MAT_SYMMETRY_ETERNAL:
1487     MatCheckPreallocated(A,1);
1488     PetscCall(MatSetOption(a->A,op,flg));
1489     break;
1490   default:
1491     SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1492   }
1493   PetscFunctionReturn(0);
1494 }
1495 
1496 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1497 {
1498   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1499   Mat_SeqBAIJ    *Aloc;
1500   Mat            B;
1501   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1502   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1503   MatScalar      *a;
1504 
1505   PetscFunctionBegin;
1506   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1507     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
1508     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
1509     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
1510     /* Do not know preallocation information, but must set block size */
1511     PetscCall(MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL));
1512   } else {
1513     B = *matout;
1514   }
1515 
1516   /* copy over the A part */
1517   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1518   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1519   PetscCall(PetscMalloc1(bs,&rvals));
1520 
1521   for (i=0; i<mbs; i++) {
1522     rvals[0] = bs*(baij->rstartbs + i);
1523     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1524     for (j=ai[i]; j<ai[i+1]; j++) {
1525       col = (baij->cstartbs+aj[j])*bs;
1526       for (k=0; k<bs; k++) {
1527         PetscCall(MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES));
1528 
1529         col++; a += bs;
1530       }
1531     }
1532   }
1533   /* copy over the B part */
1534   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1535   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1536   for (i=0; i<mbs; i++) {
1537     rvals[0] = bs*(baij->rstartbs + i);
1538     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1539     for (j=ai[i]; j<ai[i+1]; j++) {
1540       col = baij->garray[aj[j]]*bs;
1541       for (k=0; k<bs; k++) {
1542         PetscCall(MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES));
1543         col++;
1544         a += bs;
1545       }
1546     }
1547   }
1548   PetscCall(PetscFree(rvals));
1549   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
1550   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
1551 
1552   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1553   else {
1554     PetscCall(MatHeaderMerge(A,&B));
1555   }
1556   PetscFunctionReturn(0);
1557 }
1558 
1559 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1560 {
1561   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1562   Mat            a     = baij->A,b = baij->B;
1563   PetscInt       s1,s2,s3;
1564 
1565   PetscFunctionBegin;
1566   PetscCall(MatGetLocalSize(mat,&s2,&s3));
1567   if (rr) {
1568     PetscCall(VecGetLocalSize(rr,&s1));
1569     PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1570     /* Overlap communication with computation. */
1571     PetscCall(VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1572   }
1573   if (ll) {
1574     PetscCall(VecGetLocalSize(ll,&s1));
1575     PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1576     PetscCall((*b->ops->diagonalscale)(b,ll,NULL));
1577   }
1578   /* scale  the diagonal block */
1579   PetscCall((*a->ops->diagonalscale)(a,ll,rr));
1580 
1581   if (rr) {
1582     /* Do a scatter end and then right scale the off-diagonal block */
1583     PetscCall(VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1584     PetscCall((*b->ops->diagonalscale)(b,NULL,baij->lvec));
1585   }
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1590 {
1591   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1592   PetscInt      *lrows;
1593   PetscInt       r, len;
1594   PetscBool      cong;
1595 
1596   PetscFunctionBegin;
1597   /* get locally owned rows */
1598   PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows));
1599   /* fix right hand side if needed */
1600   if (x && b) {
1601     const PetscScalar *xx;
1602     PetscScalar       *bb;
1603 
1604     PetscCall(VecGetArrayRead(x,&xx));
1605     PetscCall(VecGetArray(b,&bb));
1606     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1607     PetscCall(VecRestoreArrayRead(x,&xx));
1608     PetscCall(VecRestoreArray(b,&bb));
1609   }
1610 
1611   /* actually zap the local rows */
1612   /*
1613         Zero the required rows. If the "diagonal block" of the matrix
1614      is square and the user wishes to set the diagonal we use separate
1615      code so that MatSetValues() is not called for each diagonal allocating
1616      new memory, thus calling lots of mallocs and slowing things down.
1617 
1618   */
1619   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1620   PetscCall(MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL));
1621   PetscCall(MatHasCongruentLayouts(A,&cong));
1622   if ((diag != 0.0) && cong) {
1623     PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL));
1624   } else if (diag != 0.0) {
1625     PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL));
1626     PetscCheck(!((Mat_SeqBAIJ*)l->A->data)->nonew,PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1627        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1628     for (r = 0; r < len; ++r) {
1629       const PetscInt row = lrows[r] + A->rmap->rstart;
1630       PetscCall(MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES));
1631     }
1632     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
1633     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
1634   } else {
1635     PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL));
1636   }
1637   PetscCall(PetscFree(lrows));
1638 
1639   /* only change matrix nonzero state if pattern was allowed to be changed */
1640   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1641     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1642     PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A)));
1643   }
1644   PetscFunctionReturn(0);
1645 }
1646 
1647 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1648 {
1649   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1650   PetscMPIInt       n = A->rmap->n,p = 0;
1651   PetscInt          i,j,k,r,len = 0,row,col,count;
1652   PetscInt          *lrows,*owners = A->rmap->range;
1653   PetscSFNode       *rrows;
1654   PetscSF           sf;
1655   const PetscScalar *xx;
1656   PetscScalar       *bb,*mask;
1657   Vec               xmask,lmask;
1658   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1659   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1660   PetscScalar       *aa;
1661 
1662   PetscFunctionBegin;
1663   /* Create SF where leaves are input rows and roots are owned rows */
1664   PetscCall(PetscMalloc1(n, &lrows));
1665   for (r = 0; r < n; ++r) lrows[r] = -1;
1666   PetscCall(PetscMalloc1(N, &rrows));
1667   for (r = 0; r < N; ++r) {
1668     const PetscInt idx   = rows[r];
1669     PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N);
1670     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1671       PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p));
1672     }
1673     rrows[r].rank  = p;
1674     rrows[r].index = rows[r] - owners[p];
1675   }
1676   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf));
1677   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1678   /* Collect flags for rows to be zeroed */
1679   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
1680   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR));
1681   PetscCall(PetscSFDestroy(&sf));
1682   /* Compress and put in row numbers */
1683   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1684   /* zero diagonal part of matrix */
1685   PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b));
1686   /* handle off diagonal part of matrix */
1687   PetscCall(MatCreateVecs(A,&xmask,NULL));
1688   PetscCall(VecDuplicate(l->lvec,&lmask));
1689   PetscCall(VecGetArray(xmask,&bb));
1690   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1691   PetscCall(VecRestoreArray(xmask,&bb));
1692   PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
1693   PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD));
1694   PetscCall(VecDestroy(&xmask));
1695   if (x) {
1696     PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
1697     PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD));
1698     PetscCall(VecGetArrayRead(l->lvec,&xx));
1699     PetscCall(VecGetArray(b,&bb));
1700   }
1701   PetscCall(VecGetArray(lmask,&mask));
1702   /* remove zeroed rows of off diagonal matrix */
1703   for (i = 0; i < len; ++i) {
1704     row   = lrows[i];
1705     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1706     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1707     for (k = 0; k < count; ++k) {
1708       aa[0] = 0.0;
1709       aa   += bs;
1710     }
1711   }
1712   /* loop over all elements of off process part of matrix zeroing removed columns*/
1713   for (i = 0; i < l->B->rmap->N; ++i) {
1714     row = i/bs;
1715     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1716       for (k = 0; k < bs; ++k) {
1717         col = bs*baij->j[j] + k;
1718         if (PetscAbsScalar(mask[col])) {
1719           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1720           if (x) bb[i] -= aa[0]*xx[col];
1721           aa[0] = 0.0;
1722         }
1723       }
1724     }
1725   }
1726   if (x) {
1727     PetscCall(VecRestoreArray(b,&bb));
1728     PetscCall(VecRestoreArrayRead(l->lvec,&xx));
1729   }
1730   PetscCall(VecRestoreArray(lmask,&mask));
1731   PetscCall(VecDestroy(&lmask));
1732   PetscCall(PetscFree(lrows));
1733 
1734   /* only change matrix nonzero state if pattern was allowed to be changed */
1735   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1736     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1737     PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A)));
1738   }
1739   PetscFunctionReturn(0);
1740 }
1741 
1742 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1743 {
1744   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1745 
1746   PetscFunctionBegin;
1747   PetscCall(MatSetUnfactored(a->A));
1748   PetscFunctionReturn(0);
1749 }
1750 
1751 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1752 
1753 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1754 {
1755   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1756   Mat            a,b,c,d;
1757   PetscBool      flg;
1758 
1759   PetscFunctionBegin;
1760   a = matA->A; b = matA->B;
1761   c = matB->A; d = matB->B;
1762 
1763   PetscCall(MatEqual(a,c,&flg));
1764   if (flg) {
1765     PetscCall(MatEqual(b,d,&flg));
1766   }
1767   PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
1768   PetscFunctionReturn(0);
1769 }
1770 
1771 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1772 {
1773   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1774   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1775 
1776   PetscFunctionBegin;
1777   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1778   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1779     PetscCall(MatCopy_Basic(A,B,str));
1780   } else {
1781     PetscCall(MatCopy(a->A,b->A,str));
1782     PetscCall(MatCopy(a->B,b->B,str));
1783   }
1784   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1785   PetscFunctionReturn(0);
1786 }
1787 
1788 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1789 {
1790   PetscFunctionBegin;
1791   PetscCall(MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL));
1792   PetscFunctionReturn(0);
1793 }
1794 
1795 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1796 {
1797   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1798   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1799   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1800 
1801   PetscFunctionBegin;
1802   PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz));
1803   PetscFunctionReturn(0);
1804 }
1805 
1806 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1807 {
1808   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1809   PetscBLASInt   bnz,one=1;
1810   Mat_SeqBAIJ    *x,*y;
1811   PetscInt       bs2 = Y->rmap->bs*Y->rmap->bs;
1812 
1813   PetscFunctionBegin;
1814   if (str == SAME_NONZERO_PATTERN) {
1815     PetscScalar alpha = a;
1816     x    = (Mat_SeqBAIJ*)xx->A->data;
1817     y    = (Mat_SeqBAIJ*)yy->A->data;
1818     PetscCall(PetscBLASIntCast(x->nz*bs2,&bnz));
1819     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1820     x    = (Mat_SeqBAIJ*)xx->B->data;
1821     y    = (Mat_SeqBAIJ*)yy->B->data;
1822     PetscCall(PetscBLASIntCast(x->nz*bs2,&bnz));
1823     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1824     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1825   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1826     PetscCall(MatAXPY_Basic(Y,a,X,str));
1827   } else {
1828     Mat      B;
1829     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1830     PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d));
1831     PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o));
1832     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B));
1833     PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name));
1834     PetscCall(MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N));
1835     PetscCall(MatSetBlockSizesFromMats(B,Y,Y));
1836     PetscCall(MatSetType(B,MATMPIBAIJ));
1837     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d));
1838     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o));
1839     PetscCall(MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o));
1840     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1841     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
1842     PetscCall(MatHeaderMerge(Y,&B));
1843     PetscCall(PetscFree(nnz_d));
1844     PetscCall(PetscFree(nnz_o));
1845   }
1846   PetscFunctionReturn(0);
1847 }
1848 
1849 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1850 {
1851   PetscFunctionBegin;
1852   if (PetscDefined(USE_COMPLEX)) {
1853     Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data;
1854 
1855     PetscCall(MatConjugate_SeqBAIJ(a->A));
1856     PetscCall(MatConjugate_SeqBAIJ(a->B));
1857   }
1858   PetscFunctionReturn(0);
1859 }
1860 
1861 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1862 {
1863   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1864 
1865   PetscFunctionBegin;
1866   PetscCall(MatRealPart(a->A));
1867   PetscCall(MatRealPart(a->B));
1868   PetscFunctionReturn(0);
1869 }
1870 
1871 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1872 {
1873   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1874 
1875   PetscFunctionBegin;
1876   PetscCall(MatImaginaryPart(a->A));
1877   PetscCall(MatImaginaryPart(a->B));
1878   PetscFunctionReturn(0);
1879 }
1880 
1881 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1882 {
1883   IS             iscol_local;
1884   PetscInt       csize;
1885 
1886   PetscFunctionBegin;
1887   PetscCall(ISGetLocalSize(iscol,&csize));
1888   if (call == MAT_REUSE_MATRIX) {
1889     PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local));
1890     PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1891   } else {
1892     PetscCall(ISAllGather(iscol,&iscol_local));
1893   }
1894   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat));
1895   if (call == MAT_INITIAL_MATRIX) {
1896     PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local));
1897     PetscCall(ISDestroy(&iscol_local));
1898   }
1899   PetscFunctionReturn(0);
1900 }
1901 
1902 /*
1903   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1904   in local and then by concatenating the local matrices the end result.
1905   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1906   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1907 */
1908 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1909 {
1910   PetscMPIInt    rank,size;
1911   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1912   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1913   Mat            M,Mreuse;
1914   MatScalar      *vwork,*aa;
1915   MPI_Comm       comm;
1916   IS             isrow_new, iscol_new;
1917   Mat_SeqBAIJ    *aij;
1918 
1919   PetscFunctionBegin;
1920   PetscCall(PetscObjectGetComm((PetscObject)mat,&comm));
1921   PetscCallMPI(MPI_Comm_rank(comm,&rank));
1922   PetscCallMPI(MPI_Comm_size(comm,&size));
1923   /* The compression and expansion should be avoided. Doesn't point
1924      out errors, might change the indices, hence buggey */
1925   PetscCall(ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new));
1926   PetscCall(ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new));
1927 
1928   if (call ==  MAT_REUSE_MATRIX) {
1929     PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse));
1930     PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1931     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse));
1932   } else {
1933     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse));
1934   }
1935   PetscCall(ISDestroy(&isrow_new));
1936   PetscCall(ISDestroy(&iscol_new));
1937   /*
1938       m - number of local rows
1939       n - number of columns (same on all processors)
1940       rstart - first row in new global matrix generated
1941   */
1942   PetscCall(MatGetBlockSize(mat,&bs));
1943   PetscCall(MatGetSize(Mreuse,&m,&n));
1944   m    = m/bs;
1945   n    = n/bs;
1946 
1947   if (call == MAT_INITIAL_MATRIX) {
1948     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1949     ii  = aij->i;
1950     jj  = aij->j;
1951 
1952     /*
1953         Determine the number of non-zeros in the diagonal and off-diagonal
1954         portions of the matrix in order to do correct preallocation
1955     */
1956 
1957     /* first get start and end of "diagonal" columns */
1958     if (csize == PETSC_DECIDE) {
1959       PetscCall(ISGetSize(isrow,&mglobal));
1960       if (mglobal == n*bs) { /* square matrix */
1961         nlocal = m;
1962       } else {
1963         nlocal = n/size + ((n % size) > rank);
1964       }
1965     } else {
1966       nlocal = csize/bs;
1967     }
1968     PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm));
1969     rstart = rend - nlocal;
1970     PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n);
1971 
1972     /* next, compute all the lengths */
1973     PetscCall(PetscMalloc2(m+1,&dlens,m+1,&olens));
1974     for (i=0; i<m; i++) {
1975       jend = ii[i+1] - ii[i];
1976       olen = 0;
1977       dlen = 0;
1978       for (j=0; j<jend; j++) {
1979         if (*jj < rstart || *jj >= rend) olen++;
1980         else dlen++;
1981         jj++;
1982       }
1983       olens[i] = olen;
1984       dlens[i] = dlen;
1985     }
1986     PetscCall(MatCreate(comm,&M));
1987     PetscCall(MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n));
1988     PetscCall(MatSetType(M,((PetscObject)mat)->type_name));
1989     PetscCall(MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens));
1990     PetscCall(MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens));
1991     PetscCall(PetscFree2(dlens,olens));
1992   } else {
1993     PetscInt ml,nl;
1994 
1995     M    = *newmat;
1996     PetscCall(MatGetLocalSize(M,&ml,&nl));
1997     PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1998     PetscCall(MatZeroEntries(M));
1999     /*
2000          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2001        rather than the slower MatSetValues().
2002     */
2003     M->was_assembled = PETSC_TRUE;
2004     M->assembled     = PETSC_FALSE;
2005   }
2006   PetscCall(MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE));
2007   PetscCall(MatGetOwnershipRange(M,&rstart,&rend));
2008   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2009   ii   = aij->i;
2010   jj   = aij->j;
2011   aa   = aij->a;
2012   for (i=0; i<m; i++) {
2013     row   = rstart/bs + i;
2014     nz    = ii[i+1] - ii[i];
2015     cwork = jj;     jj += nz;
2016     vwork = aa;     aa += nz*bs*bs;
2017     PetscCall(MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES));
2018   }
2019 
2020   PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
2021   PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
2022   *newmat = M;
2023 
2024   /* save submatrix used in processor for next request */
2025   if (call ==  MAT_INITIAL_MATRIX) {
2026     PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse));
2027     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2028   }
2029   PetscFunctionReturn(0);
2030 }
2031 
2032 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2033 {
2034   MPI_Comm       comm,pcomm;
2035   PetscInt       clocal_size,nrows;
2036   const PetscInt *rows;
2037   PetscMPIInt    size;
2038   IS             crowp,lcolp;
2039 
2040   PetscFunctionBegin;
2041   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2042   /* make a collective version of 'rowp' */
2043   PetscCall(PetscObjectGetComm((PetscObject)rowp,&pcomm));
2044   if (pcomm==comm) {
2045     crowp = rowp;
2046   } else {
2047     PetscCall(ISGetSize(rowp,&nrows));
2048     PetscCall(ISGetIndices(rowp,&rows));
2049     PetscCall(ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp));
2050     PetscCall(ISRestoreIndices(rowp,&rows));
2051   }
2052   PetscCall(ISSetPermutation(crowp));
2053   /* make a local version of 'colp' */
2054   PetscCall(PetscObjectGetComm((PetscObject)colp,&pcomm));
2055   PetscCallMPI(MPI_Comm_size(pcomm,&size));
2056   if (size==1) {
2057     lcolp = colp;
2058   } else {
2059     PetscCall(ISAllGather(colp,&lcolp));
2060   }
2061   PetscCall(ISSetPermutation(lcolp));
2062   /* now we just get the submatrix */
2063   PetscCall(MatGetLocalSize(A,NULL,&clocal_size));
2064   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B));
2065   /* clean up */
2066   if (pcomm!=comm) {
2067     PetscCall(ISDestroy(&crowp));
2068   }
2069   if (size>1) {
2070     PetscCall(ISDestroy(&lcolp));
2071   }
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2076 {
2077   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2078   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2079 
2080   PetscFunctionBegin;
2081   if (nghosts) *nghosts = B->nbs;
2082   if (ghosts) *ghosts = baij->garray;
2083   PetscFunctionReturn(0);
2084 }
2085 
2086 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2087 {
2088   Mat            B;
2089   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2090   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2091   Mat_SeqAIJ     *b;
2092   PetscMPIInt    size,rank,*recvcounts = NULL,*displs = NULL;
2093   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2094   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2095 
2096   PetscFunctionBegin;
2097   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
2098   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank));
2099 
2100   /* ----------------------------------------------------------------
2101      Tell every processor the number of nonzeros per row
2102   */
2103   PetscCall(PetscMalloc1(A->rmap->N/bs,&lens));
2104   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2105     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2106   }
2107   PetscCall(PetscMalloc1(2*size,&recvcounts));
2108   displs    = recvcounts + size;
2109   for (i=0; i<size; i++) {
2110     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2111     displs[i]     = A->rmap->range[i]/bs;
2112   }
2113   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A)));
2114   /* ---------------------------------------------------------------
2115      Create the sequential matrix of the same type as the local block diagonal
2116   */
2117   PetscCall(MatCreate(PETSC_COMM_SELF,&B));
2118   PetscCall(MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE));
2119   PetscCall(MatSetType(B,MATSEQAIJ));
2120   PetscCall(MatSeqAIJSetPreallocation(B,0,lens));
2121   b    = (Mat_SeqAIJ*)B->data;
2122 
2123   /*--------------------------------------------------------------------
2124     Copy my part of matrix column indices over
2125   */
2126   sendcount  = ad->nz + bd->nz;
2127   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2128   a_jsendbuf = ad->j;
2129   b_jsendbuf = bd->j;
2130   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2131   cnt        = 0;
2132   for (i=0; i<n; i++) {
2133 
2134     /* put in lower diagonal portion */
2135     m = bd->i[i+1] - bd->i[i];
2136     while (m > 0) {
2137       /* is it above diagonal (in bd (compressed) numbering) */
2138       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2139       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2140       m--;
2141     }
2142 
2143     /* put in diagonal portion */
2144     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2145       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2146     }
2147 
2148     /* put in upper diagonal portion */
2149     while (m-- > 0) {
2150       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2151     }
2152   }
2153   PetscCheck(cnt == sendcount,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT,sendcount,cnt);
2154 
2155   /*--------------------------------------------------------------------
2156     Gather all column indices to all processors
2157   */
2158   for (i=0; i<size; i++) {
2159     recvcounts[i] = 0;
2160     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2161       recvcounts[i] += lens[j];
2162     }
2163   }
2164   displs[0] = 0;
2165   for (i=1; i<size; i++) {
2166     displs[i] = displs[i-1] + recvcounts[i-1];
2167   }
2168   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A)));
2169   /*--------------------------------------------------------------------
2170     Assemble the matrix into useable form (note numerical values not yet set)
2171   */
2172   /* set the b->ilen (length of each row) values */
2173   PetscCall(PetscArraycpy(b->ilen,lens,A->rmap->N/bs));
2174   /* set the b->i indices */
2175   b->i[0] = 0;
2176   for (i=1; i<=A->rmap->N/bs; i++) {
2177     b->i[i] = b->i[i-1] + lens[i-1];
2178   }
2179   PetscCall(PetscFree(lens));
2180   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
2181   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
2182   PetscCall(PetscFree(recvcounts));
2183 
2184   if (A->symmetric) {
2185     PetscCall(MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE));
2186   } else if (A->hermitian) {
2187     PetscCall(MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE));
2188   } else if (A->structurally_symmetric) {
2189     PetscCall(MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE));
2190   }
2191   *newmat = B;
2192   PetscFunctionReturn(0);
2193 }
2194 
2195 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2196 {
2197   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2198   Vec            bb1 = NULL;
2199 
2200   PetscFunctionBegin;
2201   if (flag == SOR_APPLY_UPPER) {
2202     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2203     PetscFunctionReturn(0);
2204   }
2205 
2206   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2207     PetscCall(VecDuplicate(bb,&bb1));
2208   }
2209 
2210   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2211     if (flag & SOR_ZERO_INITIAL_GUESS) {
2212       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2213       its--;
2214     }
2215 
2216     while (its--) {
2217       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
2218       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
2219 
2220       /* update rhs: bb1 = bb - B*x */
2221       PetscCall(VecScale(mat->lvec,-1.0));
2222       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
2223 
2224       /* local sweep */
2225       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx));
2226     }
2227   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2228     if (flag & SOR_ZERO_INITIAL_GUESS) {
2229       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2230       its--;
2231     }
2232     while (its--) {
2233       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
2234       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
2235 
2236       /* update rhs: bb1 = bb - B*x */
2237       PetscCall(VecScale(mat->lvec,-1.0));
2238       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
2239 
2240       /* local sweep */
2241       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx));
2242     }
2243   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2244     if (flag & SOR_ZERO_INITIAL_GUESS) {
2245       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2246       its--;
2247     }
2248     while (its--) {
2249       PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
2250       PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD));
2251 
2252       /* update rhs: bb1 = bb - B*x */
2253       PetscCall(VecScale(mat->lvec,-1.0));
2254       PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1));
2255 
2256       /* local sweep */
2257       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx));
2258     }
2259   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2260 
2261   PetscCall(VecDestroy(&bb1));
2262   PetscFunctionReturn(0);
2263 }
2264 
2265 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A,PetscInt type,PetscReal *reductions)
2266 {
2267   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2268   PetscInt       m,N,i,*garray = aij->garray;
2269   PetscInt       ib,jb,bs = A->rmap->bs;
2270   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2271   MatScalar      *a_val = a_aij->a;
2272   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2273   MatScalar      *b_val = b_aij->a;
2274   PetscReal      *work;
2275 
2276   PetscFunctionBegin;
2277   PetscCall(MatGetSize(A,&m,&N));
2278   PetscCall(PetscCalloc1(N,&work));
2279   if (type == NORM_2) {
2280     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2281       for (jb=0; jb<bs; jb++) {
2282         for (ib=0; ib<bs; ib++) {
2283           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2284           a_val++;
2285         }
2286       }
2287     }
2288     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2289       for (jb=0; jb<bs; jb++) {
2290         for (ib=0; ib<bs; ib++) {
2291           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2292           b_val++;
2293         }
2294       }
2295     }
2296   } else if (type == NORM_1) {
2297     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2298       for (jb=0; jb<bs; jb++) {
2299         for (ib=0; ib<bs; ib++) {
2300           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2301           a_val++;
2302         }
2303       }
2304     }
2305     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2306       for (jb=0; jb<bs; jb++) {
2307        for (ib=0; ib<bs; ib++) {
2308           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2309           b_val++;
2310         }
2311       }
2312     }
2313   } else if (type == NORM_INFINITY) {
2314     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2315       for (jb=0; jb<bs; jb++) {
2316         for (ib=0; ib<bs; ib++) {
2317           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2318           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2319           a_val++;
2320         }
2321       }
2322     }
2323     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2324       for (jb=0; jb<bs; jb++) {
2325         for (ib=0; ib<bs; ib++) {
2326           int col = garray[b_aij->j[i]] * bs + jb;
2327           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2328           b_val++;
2329         }
2330       }
2331     }
2332   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2333     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2334       for (jb=0; jb<bs; jb++) {
2335         for (ib=0; ib<bs; ib++) {
2336           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2337           a_val++;
2338         }
2339       }
2340     }
2341     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2342       for (jb=0; jb<bs; jb++) {
2343        for (ib=0; ib<bs; ib++) {
2344           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2345           b_val++;
2346         }
2347       }
2348     }
2349   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2350     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2351       for (jb=0; jb<bs; jb++) {
2352         for (ib=0; ib<bs; ib++) {
2353           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2354           a_val++;
2355         }
2356       }
2357     }
2358     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2359       for (jb=0; jb<bs; jb++) {
2360        for (ib=0; ib<bs; ib++) {
2361           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2362           b_val++;
2363         }
2364       }
2365     }
2366   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
2367   if (type == NORM_INFINITY) {
2368     PetscCall(MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
2369   } else {
2370     PetscCall(MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
2371   }
2372   PetscCall(PetscFree(work));
2373   if (type == NORM_2) {
2374     for (i=0; i<N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2375   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2376     for (i=0; i<N; i++) reductions[i] /= m;
2377   }
2378   PetscFunctionReturn(0);
2379 }
2380 
2381 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2382 {
2383   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2384 
2385   PetscFunctionBegin;
2386   PetscCall(MatInvertBlockDiagonal(a->A,values));
2387   A->factorerrortype             = a->A->factorerrortype;
2388   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2389   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
2394 {
2395   Mat_MPIBAIJ    *maij = (Mat_MPIBAIJ*)Y->data;
2396   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)maij->A->data;
2397 
2398   PetscFunctionBegin;
2399   if (!Y->preallocated) {
2400     PetscCall(MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL));
2401   } else if (!aij->nz) {
2402     PetscInt nonew = aij->nonew;
2403     PetscCall(MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL));
2404     aij->nonew = nonew;
2405   }
2406   PetscCall(MatShift_Basic(Y,a));
2407   PetscFunctionReturn(0);
2408 }
2409 
2410 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2411 {
2412   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2413 
2414   PetscFunctionBegin;
2415   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2416   PetscCall(MatMissingDiagonal(a->A,missing,d));
2417   if (d) {
2418     PetscInt rstart;
2419     PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
2420     *d += rstart/A->rmap->bs;
2421 
2422   }
2423   PetscFunctionReturn(0);
2424 }
2425 
2426 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2427 {
2428   PetscFunctionBegin;
2429   *a = ((Mat_MPIBAIJ*)A->data)->A;
2430   PetscFunctionReturn(0);
2431 }
2432 
2433 /* -------------------------------------------------------------------*/
2434 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2435                                        MatGetRow_MPIBAIJ,
2436                                        MatRestoreRow_MPIBAIJ,
2437                                        MatMult_MPIBAIJ,
2438                                 /* 4*/ MatMultAdd_MPIBAIJ,
2439                                        MatMultTranspose_MPIBAIJ,
2440                                        MatMultTransposeAdd_MPIBAIJ,
2441                                        NULL,
2442                                        NULL,
2443                                        NULL,
2444                                 /*10*/ NULL,
2445                                        NULL,
2446                                        NULL,
2447                                        MatSOR_MPIBAIJ,
2448                                        MatTranspose_MPIBAIJ,
2449                                 /*15*/ MatGetInfo_MPIBAIJ,
2450                                        MatEqual_MPIBAIJ,
2451                                        MatGetDiagonal_MPIBAIJ,
2452                                        MatDiagonalScale_MPIBAIJ,
2453                                        MatNorm_MPIBAIJ,
2454                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2455                                        MatAssemblyEnd_MPIBAIJ,
2456                                        MatSetOption_MPIBAIJ,
2457                                        MatZeroEntries_MPIBAIJ,
2458                                 /*24*/ MatZeroRows_MPIBAIJ,
2459                                        NULL,
2460                                        NULL,
2461                                        NULL,
2462                                        NULL,
2463                                 /*29*/ MatSetUp_MPIBAIJ,
2464                                        NULL,
2465                                        NULL,
2466                                        MatGetDiagonalBlock_MPIBAIJ,
2467                                        NULL,
2468                                 /*34*/ MatDuplicate_MPIBAIJ,
2469                                        NULL,
2470                                        NULL,
2471                                        NULL,
2472                                        NULL,
2473                                 /*39*/ MatAXPY_MPIBAIJ,
2474                                        MatCreateSubMatrices_MPIBAIJ,
2475                                        MatIncreaseOverlap_MPIBAIJ,
2476                                        MatGetValues_MPIBAIJ,
2477                                        MatCopy_MPIBAIJ,
2478                                 /*44*/ NULL,
2479                                        MatScale_MPIBAIJ,
2480                                        MatShift_MPIBAIJ,
2481                                        NULL,
2482                                        MatZeroRowsColumns_MPIBAIJ,
2483                                 /*49*/ NULL,
2484                                        NULL,
2485                                        NULL,
2486                                        NULL,
2487                                        NULL,
2488                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2489                                        NULL,
2490                                        MatSetUnfactored_MPIBAIJ,
2491                                        MatPermute_MPIBAIJ,
2492                                        MatSetValuesBlocked_MPIBAIJ,
2493                                 /*59*/ MatCreateSubMatrix_MPIBAIJ,
2494                                        MatDestroy_MPIBAIJ,
2495                                        MatView_MPIBAIJ,
2496                                        NULL,
2497                                        NULL,
2498                                 /*64*/ NULL,
2499                                        NULL,
2500                                        NULL,
2501                                        NULL,
2502                                        NULL,
2503                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2504                                        NULL,
2505                                        NULL,
2506                                        NULL,
2507                                        NULL,
2508                                 /*74*/ NULL,
2509                                        MatFDColoringApply_BAIJ,
2510                                        NULL,
2511                                        NULL,
2512                                        NULL,
2513                                 /*79*/ NULL,
2514                                        NULL,
2515                                        NULL,
2516                                        NULL,
2517                                        MatLoad_MPIBAIJ,
2518                                 /*84*/ NULL,
2519                                        NULL,
2520                                        NULL,
2521                                        NULL,
2522                                        NULL,
2523                                 /*89*/ NULL,
2524                                        NULL,
2525                                        NULL,
2526                                        NULL,
2527                                        NULL,
2528                                 /*94*/ NULL,
2529                                        NULL,
2530                                        NULL,
2531                                        NULL,
2532                                        NULL,
2533                                 /*99*/ NULL,
2534                                        NULL,
2535                                        NULL,
2536                                        MatConjugate_MPIBAIJ,
2537                                        NULL,
2538                                 /*104*/NULL,
2539                                        MatRealPart_MPIBAIJ,
2540                                        MatImaginaryPart_MPIBAIJ,
2541                                        NULL,
2542                                        NULL,
2543                                 /*109*/NULL,
2544                                        NULL,
2545                                        NULL,
2546                                        NULL,
2547                                        MatMissingDiagonal_MPIBAIJ,
2548                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2549                                        NULL,
2550                                        MatGetGhosts_MPIBAIJ,
2551                                        NULL,
2552                                        NULL,
2553                                 /*119*/NULL,
2554                                        NULL,
2555                                        NULL,
2556                                        NULL,
2557                                        MatGetMultiProcBlock_MPIBAIJ,
2558                                 /*124*/NULL,
2559                                        MatGetColumnReductions_MPIBAIJ,
2560                                        MatInvertBlockDiagonal_MPIBAIJ,
2561                                        NULL,
2562                                        NULL,
2563                                /*129*/ NULL,
2564                                        NULL,
2565                                        NULL,
2566                                        NULL,
2567                                        NULL,
2568                                /*134*/ NULL,
2569                                        NULL,
2570                                        NULL,
2571                                        NULL,
2572                                        NULL,
2573                                /*139*/ MatSetBlockSizes_Default,
2574                                        NULL,
2575                                        NULL,
2576                                        MatFDColoringSetUp_MPIXAIJ,
2577                                        NULL,
2578                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2579                                        NULL,
2580                                        NULL,
2581                                        NULL
2582 };
2583 
2584 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
2585 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
2586 
2587 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2588 {
2589   PetscInt       m,rstart,cstart,cend;
2590   PetscInt       i,j,dlen,olen,nz,nz_max=0,*d_nnz=NULL,*o_nnz=NULL;
2591   const PetscInt *JJ    =NULL;
2592   PetscScalar    *values=NULL;
2593   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2594   PetscBool      nooffprocentries;
2595 
2596   PetscFunctionBegin;
2597   PetscCall(PetscLayoutSetBlockSize(B->rmap,bs));
2598   PetscCall(PetscLayoutSetBlockSize(B->cmap,bs));
2599   PetscCall(PetscLayoutSetUp(B->rmap));
2600   PetscCall(PetscLayoutSetUp(B->cmap));
2601   PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs));
2602   m      = B->rmap->n/bs;
2603   rstart = B->rmap->rstart/bs;
2604   cstart = B->cmap->rstart/bs;
2605   cend   = B->cmap->rend/bs;
2606 
2607   PetscCheck(!ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]);
2608   PetscCall(PetscMalloc2(m,&d_nnz,m,&o_nnz));
2609   for (i=0; i<m; i++) {
2610     nz = ii[i+1] - ii[i];
2611     PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz);
2612     nz_max = PetscMax(nz_max,nz);
2613     dlen   = 0;
2614     olen   = 0;
2615     JJ     = jj + ii[i];
2616     for (j=0; j<nz; j++) {
2617       if (*JJ < cstart || *JJ >= cend) olen++;
2618       else dlen++;
2619       JJ++;
2620     }
2621     d_nnz[i] = dlen;
2622     o_nnz[i] = olen;
2623   }
2624   PetscCall(MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz));
2625   PetscCall(PetscFree2(d_nnz,o_nnz));
2626 
2627   values = (PetscScalar*)V;
2628   if (!values) {
2629     PetscCall(PetscCalloc1(bs*bs*nz_max,&values));
2630   }
2631   for (i=0; i<m; i++) {
2632     PetscInt          row    = i + rstart;
2633     PetscInt          ncols  = ii[i+1] - ii[i];
2634     const PetscInt    *icols = jj + ii[i];
2635     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2636       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2637       PetscCall(MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES));
2638     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2639       PetscInt j;
2640       for (j=0; j<ncols; j++) {
2641         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2642         PetscCall(MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES));
2643       }
2644     }
2645   }
2646 
2647   if (!V) PetscCall(PetscFree(values));
2648   nooffprocentries    = B->nooffprocentries;
2649   B->nooffprocentries = PETSC_TRUE;
2650   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
2651   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
2652   B->nooffprocentries = nooffprocentries;
2653 
2654   PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
2655   PetscFunctionReturn(0);
2656 }
2657 
2658 /*@C
2659    MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
2660 
2661    Collective
2662 
2663    Input Parameters:
2664 +  B - the matrix
2665 .  bs - the block size
2666 .  i - the indices into j for the start of each local row (starts with zero)
2667 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2668 -  v - optional values in the matrix
2669 
2670    Level: advanced
2671 
2672    Notes:
2673     The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2674    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2675    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2676    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2677    block column and the second index is over columns within a block.
2678 
2679    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2680 
2681 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ`
2682 @*/
2683 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2684 {
2685   PetscFunctionBegin;
2686   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2687   PetscValidType(B,1);
2688   PetscValidLogicalCollectiveInt(B,bs,2);
2689   PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
2690   PetscFunctionReturn(0);
2691 }
2692 
2693 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2694 {
2695   Mat_MPIBAIJ    *b;
2696   PetscInt       i;
2697   PetscMPIInt    size;
2698 
2699   PetscFunctionBegin;
2700   PetscCall(MatSetBlockSize(B,PetscAbs(bs)));
2701   PetscCall(PetscLayoutSetUp(B->rmap));
2702   PetscCall(PetscLayoutSetUp(B->cmap));
2703   PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs));
2704 
2705   if (d_nnz) {
2706     for (i=0; i<B->rmap->n/bs; i++) {
2707       PetscCheck(d_nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,d_nnz[i]);
2708     }
2709   }
2710   if (o_nnz) {
2711     for (i=0; i<B->rmap->n/bs; i++) {
2712       PetscCheck(o_nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,o_nnz[i]);
2713     }
2714   }
2715 
2716   b      = (Mat_MPIBAIJ*)B->data;
2717   b->bs2 = bs*bs;
2718   b->mbs = B->rmap->n/bs;
2719   b->nbs = B->cmap->n/bs;
2720   b->Mbs = B->rmap->N/bs;
2721   b->Nbs = B->cmap->N/bs;
2722 
2723   for (i=0; i<=b->size; i++) {
2724     b->rangebs[i] = B->rmap->range[i]/bs;
2725   }
2726   b->rstartbs = B->rmap->rstart/bs;
2727   b->rendbs   = B->rmap->rend/bs;
2728   b->cstartbs = B->cmap->rstart/bs;
2729   b->cendbs   = B->cmap->rend/bs;
2730 
2731 #if defined(PETSC_USE_CTABLE)
2732   PetscCall(PetscTableDestroy(&b->colmap));
2733 #else
2734   PetscCall(PetscFree(b->colmap));
2735 #endif
2736   PetscCall(PetscFree(b->garray));
2737   PetscCall(VecDestroy(&b->lvec));
2738   PetscCall(VecScatterDestroy(&b->Mvctx));
2739 
2740   /* Because the B will have been resized we simply destroy it and create a new one each time */
2741   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
2742   PetscCall(MatDestroy(&b->B));
2743   PetscCall(MatCreate(PETSC_COMM_SELF,&b->B));
2744   PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0));
2745   PetscCall(MatSetType(b->B,MATSEQBAIJ));
2746   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B));
2747 
2748   if (!B->preallocated) {
2749     PetscCall(MatCreate(PETSC_COMM_SELF,&b->A));
2750     PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n));
2751     PetscCall(MatSetType(b->A,MATSEQBAIJ));
2752     PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A));
2753     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash));
2754   }
2755 
2756   PetscCall(MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz));
2757   PetscCall(MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz));
2758   B->preallocated  = PETSC_TRUE;
2759   B->was_assembled = PETSC_FALSE;
2760   B->assembled     = PETSC_FALSE;
2761   PetscFunctionReturn(0);
2762 }
2763 
2764 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2765 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2766 
2767 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2768 {
2769   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2770   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2771   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2772   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2773 
2774   PetscFunctionBegin;
2775   PetscCall(PetscMalloc1(M+1,&ii));
2776   ii[0] = 0;
2777   for (i=0; i<M; i++) {
2778     PetscCheck((id[i+1] - id[i]) >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT,i,id[i],id[i+1]);
2779     PetscCheck((io[i+1] - io[i]) >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT,i,io[i],io[i+1]);
2780     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2781     /* remove one from count of matrix has diagonal */
2782     for (j=id[i]; j<id[i+1]; j++) {
2783       if (jd[j] == i) {ii[i+1]--;break;}
2784     }
2785   }
2786   PetscCall(PetscMalloc1(ii[M],&jj));
2787   cnt  = 0;
2788   for (i=0; i<M; i++) {
2789     for (j=io[i]; j<io[i+1]; j++) {
2790       if (garray[jo[j]] > rstart) break;
2791       jj[cnt++] = garray[jo[j]];
2792     }
2793     for (k=id[i]; k<id[i+1]; k++) {
2794       if (jd[k] != i) {
2795         jj[cnt++] = rstart + jd[k];
2796       }
2797     }
2798     for (; j<io[i+1]; j++) {
2799       jj[cnt++] = garray[jo[j]];
2800     }
2801   }
2802   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj));
2803   PetscFunctionReturn(0);
2804 }
2805 
2806 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2807 
2808 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2809 
2810 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2811 {
2812   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
2813   Mat_MPIAIJ  *b;
2814   Mat         B;
2815 
2816   PetscFunctionBegin;
2817   PetscCheck(A->assembled,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2818 
2819   if (reuse == MAT_REUSE_MATRIX) {
2820     B = *newmat;
2821   } else {
2822     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
2823     PetscCall(MatSetType(B,MATMPIAIJ));
2824     PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
2825     PetscCall(MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs));
2826     PetscCall(MatSeqAIJSetPreallocation(B,0,NULL));
2827     PetscCall(MatMPIAIJSetPreallocation(B,0,NULL,0,NULL));
2828   }
2829   b = (Mat_MPIAIJ*) B->data;
2830 
2831   if (reuse == MAT_REUSE_MATRIX) {
2832     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2833     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2834   } else {
2835     PetscCall(MatDestroy(&b->A));
2836     PetscCall(MatDestroy(&b->B));
2837     PetscCall(MatDisAssemble_MPIBAIJ(A));
2838     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2839     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2840     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
2841     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
2842   }
2843   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
2844   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
2845 
2846   if (reuse == MAT_INPLACE_MATRIX) {
2847     PetscCall(MatHeaderReplace(A,&B));
2848   } else {
2849    *newmat = B;
2850   }
2851   PetscFunctionReturn(0);
2852 }
2853 
2854 /*MC
2855    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2856 
2857    Options Database Keys:
2858 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2859 . -mat_block_size <bs> - set the blocksize used to store the matrix
2860 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2861 - -mat_use_hash_table <fact> - set hash table factor
2862 
2863    Level: beginner
2864 
2865    Notes:
2866     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
2867     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
2868 
2869 .seealso: `MatCreateBAIJ`
2870 M*/
2871 
2872 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2873 
2874 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2875 {
2876   Mat_MPIBAIJ    *b;
2877   PetscBool      flg = PETSC_FALSE;
2878 
2879   PetscFunctionBegin;
2880   PetscCall(PetscNewLog(B,&b));
2881   B->data = (void*)b;
2882 
2883   PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)));
2884   B->assembled = PETSC_FALSE;
2885 
2886   B->insertmode = NOT_SET_VALUES;
2887   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank));
2888   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size));
2889 
2890   /* build local table of row and column ownerships */
2891   PetscCall(PetscMalloc1(b->size+1,&b->rangebs));
2892 
2893   /* build cache for off array entries formed */
2894   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash));
2895 
2896   b->donotstash  = PETSC_FALSE;
2897   b->colmap      = NULL;
2898   b->garray      = NULL;
2899   b->roworiented = PETSC_TRUE;
2900 
2901   /* stuff used in block assembly */
2902   b->barray = NULL;
2903 
2904   /* stuff used for matrix vector multiply */
2905   b->lvec  = NULL;
2906   b->Mvctx = NULL;
2907 
2908   /* stuff for MatGetRow() */
2909   b->rowindices   = NULL;
2910   b->rowvalues    = NULL;
2911   b->getrowactive = PETSC_FALSE;
2912 
2913   /* hash table stuff */
2914   b->ht           = NULL;
2915   b->hd           = NULL;
2916   b->ht_size      = 0;
2917   b->ht_flag      = PETSC_FALSE;
2918   b->ht_fact      = 0;
2919   b->ht_total_ct  = 0;
2920   b->ht_insert_ct = 0;
2921 
2922   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2923   b->ijonly = PETSC_FALSE;
2924 
2925   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj));
2926   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ));
2927   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ));
2928 #if defined(PETSC_HAVE_HYPRE)
2929   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE));
2930 #endif
2931   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ));
2932   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ));
2933   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ));
2934   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2935   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ));
2936   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ));
2937   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS));
2938   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ));
2939 
2940   PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");
2941   PetscCall(PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg));
2942   if (flg) {
2943     PetscReal fact = 1.39;
2944     PetscCall(MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE));
2945     PetscCall(PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL));
2946     if (fact <= 1.0) fact = 1.39;
2947     PetscCall(MatMPIBAIJSetHashTableFactor(B,fact));
2948     PetscCall(PetscInfo(B,"Hash table Factor used %5.2g\n",(double)fact));
2949   }
2950   PetscOptionsEnd();
2951   PetscFunctionReturn(0);
2952 }
2953 
2954 /*MC
2955    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2956 
2957    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2958    and MATMPIBAIJ otherwise.
2959 
2960    Options Database Keys:
2961 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2962 
2963   Level: beginner
2964 
2965 .seealso: `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2966 M*/
2967 
2968 /*@C
2969    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2970    (block compressed row).  For good matrix assembly performance
2971    the user should preallocate the matrix storage by setting the parameters
2972    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2973    performance can be increased by more than a factor of 50.
2974 
2975    Collective on Mat
2976 
2977    Input Parameters:
2978 +  B - the matrix
2979 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2980           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2981 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2982            submatrix  (same for all local rows)
2983 .  d_nnz - array containing the number of block nonzeros in the various block rows
2984            of the in diagonal portion of the local (possibly different for each block
2985            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
2986            set it even if it is zero.
2987 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2988            submatrix (same for all local rows).
2989 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2990            off-diagonal portion of the local submatrix (possibly different for
2991            each block row) or NULL.
2992 
2993    If the *_nnz parameter is given then the *_nz parameter is ignored
2994 
2995    Options Database Keys:
2996 +   -mat_block_size - size of the blocks to use
2997 -   -mat_use_hash_table <fact> - set hash table factor
2998 
2999    Notes:
3000    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3001    than it must be used on all processors that share the object for that argument.
3002 
3003    Storage Information:
3004    For a square global matrix we define each processor's diagonal portion
3005    to be its local rows and the corresponding columns (a square submatrix);
3006    each processor's off-diagonal portion encompasses the remainder of the
3007    local matrix (a rectangular submatrix).
3008 
3009    The user can specify preallocated storage for the diagonal part of
3010    the local submatrix with either d_nz or d_nnz (not both).  Set
3011    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3012    memory allocation.  Likewise, specify preallocated storage for the
3013    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3014 
3015    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3016    the figure below we depict these three local rows and all columns (0-11).
3017 
3018 .vb
3019            0 1 2 3 4 5 6 7 8 9 10 11
3020           --------------------------
3021    row 3  |o o o d d d o o o o  o  o
3022    row 4  |o o o d d d o o o o  o  o
3023    row 5  |o o o d d d o o o o  o  o
3024           --------------------------
3025 .ve
3026 
3027    Thus, any entries in the d locations are stored in the d (diagonal)
3028    submatrix, and any entries in the o locations are stored in the
3029    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3030    stored simply in the MATSEQBAIJ format for compressed row storage.
3031 
3032    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3033    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3034    In general, for PDE problems in which most nonzeros are near the diagonal,
3035    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3036    or you will get TERRIBLE performance; see the users' manual chapter on
3037    matrices.
3038 
3039    You can call MatGetInfo() to get information on how effective the preallocation was;
3040    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3041    You can also run with the option -info and look for messages with the string
3042    malloc in them to see if additional memory allocation was needed.
3043 
3044    Level: intermediate
3045 
3046 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3047 @*/
3048 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3049 {
3050   PetscFunctionBegin;
3051   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3052   PetscValidType(B,1);
3053   PetscValidLogicalCollectiveInt(B,bs,2);
3054   PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));
3055   PetscFunctionReturn(0);
3056 }
3057 
3058 /*@C
3059    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3060    (block compressed row).  For good matrix assembly performance
3061    the user should preallocate the matrix storage by setting the parameters
3062    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3063    performance can be increased by more than a factor of 50.
3064 
3065    Collective
3066 
3067    Input Parameters:
3068 +  comm - MPI communicator
3069 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3070           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3071 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3072            This value should be the same as the local size used in creating the
3073            y vector for the matrix-vector product y = Ax.
3074 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3075            This value should be the same as the local size used in creating the
3076            x vector for the matrix-vector product y = Ax.
3077 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3078 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3079 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3080            submatrix  (same for all local rows)
3081 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3082            of the in diagonal portion of the local (possibly different for each block
3083            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3084            and set it even if it is zero.
3085 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3086            submatrix (same for all local rows).
3087 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3088            off-diagonal portion of the local submatrix (possibly different for
3089            each block row) or NULL.
3090 
3091    Output Parameter:
3092 .  A - the matrix
3093 
3094    Options Database Keys:
3095 +   -mat_block_size - size of the blocks to use
3096 -   -mat_use_hash_table <fact> - set hash table factor
3097 
3098    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3099    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3100    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3101 
3102    Notes:
3103    If the *_nnz parameter is given then the *_nz parameter is ignored
3104 
3105    A nonzero block is any block that as 1 or more nonzeros in it
3106 
3107    The user MUST specify either the local or global matrix dimensions
3108    (possibly both).
3109 
3110    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3111    than it must be used on all processors that share the object for that argument.
3112 
3113    Storage Information:
3114    For a square global matrix we define each processor's diagonal portion
3115    to be its local rows and the corresponding columns (a square submatrix);
3116    each processor's off-diagonal portion encompasses the remainder of the
3117    local matrix (a rectangular submatrix).
3118 
3119    The user can specify preallocated storage for the diagonal part of
3120    the local submatrix with either d_nz or d_nnz (not both).  Set
3121    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3122    memory allocation.  Likewise, specify preallocated storage for the
3123    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3124 
3125    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3126    the figure below we depict these three local rows and all columns (0-11).
3127 
3128 .vb
3129            0 1 2 3 4 5 6 7 8 9 10 11
3130           --------------------------
3131    row 3  |o o o d d d o o o o  o  o
3132    row 4  |o o o d d d o o o o  o  o
3133    row 5  |o o o d d d o o o o  o  o
3134           --------------------------
3135 .ve
3136 
3137    Thus, any entries in the d locations are stored in the d (diagonal)
3138    submatrix, and any entries in the o locations are stored in the
3139    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3140    stored simply in the MATSEQBAIJ format for compressed row storage.
3141 
3142    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3143    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3144    In general, for PDE problems in which most nonzeros are near the diagonal,
3145    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3146    or you will get TERRIBLE performance; see the users' manual chapter on
3147    matrices.
3148 
3149    Level: intermediate
3150 
3151 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
3152 @*/
3153 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3154 {
3155   PetscMPIInt    size;
3156 
3157   PetscFunctionBegin;
3158   PetscCall(MatCreate(comm,A));
3159   PetscCall(MatSetSizes(*A,m,n,M,N));
3160   PetscCallMPI(MPI_Comm_size(comm,&size));
3161   if (size > 1) {
3162     PetscCall(MatSetType(*A,MATMPIBAIJ));
3163     PetscCall(MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz));
3164   } else {
3165     PetscCall(MatSetType(*A,MATSEQBAIJ));
3166     PetscCall(MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz));
3167   }
3168   PetscFunctionReturn(0);
3169 }
3170 
3171 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3172 {
3173   Mat            mat;
3174   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3175   PetscInt       len=0;
3176 
3177   PetscFunctionBegin;
3178   *newmat = NULL;
3179   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat));
3180   PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N));
3181   PetscCall(MatSetType(mat,((PetscObject)matin)->type_name));
3182 
3183   mat->factortype   = matin->factortype;
3184   mat->preallocated = PETSC_TRUE;
3185   mat->assembled    = PETSC_TRUE;
3186   mat->insertmode   = NOT_SET_VALUES;
3187 
3188   a             = (Mat_MPIBAIJ*)mat->data;
3189   mat->rmap->bs = matin->rmap->bs;
3190   a->bs2        = oldmat->bs2;
3191   a->mbs        = oldmat->mbs;
3192   a->nbs        = oldmat->nbs;
3193   a->Mbs        = oldmat->Mbs;
3194   a->Nbs        = oldmat->Nbs;
3195 
3196   PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap));
3197   PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap));
3198 
3199   a->size         = oldmat->size;
3200   a->rank         = oldmat->rank;
3201   a->donotstash   = oldmat->donotstash;
3202   a->roworiented  = oldmat->roworiented;
3203   a->rowindices   = NULL;
3204   a->rowvalues    = NULL;
3205   a->getrowactive = PETSC_FALSE;
3206   a->barray       = NULL;
3207   a->rstartbs     = oldmat->rstartbs;
3208   a->rendbs       = oldmat->rendbs;
3209   a->cstartbs     = oldmat->cstartbs;
3210   a->cendbs       = oldmat->cendbs;
3211 
3212   /* hash table stuff */
3213   a->ht           = NULL;
3214   a->hd           = NULL;
3215   a->ht_size      = 0;
3216   a->ht_flag      = oldmat->ht_flag;
3217   a->ht_fact      = oldmat->ht_fact;
3218   a->ht_total_ct  = 0;
3219   a->ht_insert_ct = 0;
3220 
3221   PetscCall(PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1));
3222   if (oldmat->colmap) {
3223 #if defined(PETSC_USE_CTABLE)
3224     PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap));
3225 #else
3226     PetscCall(PetscMalloc1(a->Nbs,&a->colmap));
3227     PetscCall(PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt)));
3228     PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs));
3229 #endif
3230   } else a->colmap = NULL;
3231 
3232   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3233     PetscCall(PetscMalloc1(len,&a->garray));
3234     PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt)));
3235     PetscCall(PetscArraycpy(a->garray,oldmat->garray,len));
3236   } else a->garray = NULL;
3237 
3238   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash));
3239   PetscCall(VecDuplicate(oldmat->lvec,&a->lvec));
3240   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec));
3241   PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx));
3242   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx));
3243 
3244   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
3245   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
3246   PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B));
3247   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B));
3248   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist));
3249   *newmat = mat;
3250   PetscFunctionReturn(0);
3251 }
3252 
3253 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3254 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
3255 {
3256   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3257   PetscInt       *rowidxs,*colidxs,rs,cs,ce;
3258   PetscScalar    *matvals;
3259 
3260   PetscFunctionBegin;
3261   PetscCall(PetscViewerSetUp(viewer));
3262 
3263   /* read in matrix header */
3264   PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT));
3265   PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3266   M  = header[1]; N = header[2]; nz = header[3];
3267   PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
3268   PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
3269   PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ");
3270 
3271   /* set block sizes from the viewer's .info file */
3272   PetscCall(MatLoad_Binary_BlockSizes(mat,viewer));
3273   /* set local sizes if not set already */
3274   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3275   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3276   /* set global sizes if not set already */
3277   if (mat->rmap->N < 0) mat->rmap->N = M;
3278   if (mat->cmap->N < 0) mat->cmap->N = N;
3279   PetscCall(PetscLayoutSetUp(mat->rmap));
3280   PetscCall(PetscLayoutSetUp(mat->cmap));
3281 
3282   /* check if the matrix sizes are correct */
3283   PetscCall(MatGetSize(mat,&rows,&cols));
3284   PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
3285   PetscCall(MatGetBlockSize(mat,&bs));
3286   PetscCall(MatGetLocalSize(mat,&m,&n));
3287   PetscCall(PetscLayoutGetRange(mat->rmap,&rs,NULL));
3288   PetscCall(PetscLayoutGetRange(mat->cmap,&cs,&ce));
3289   mbs = m/bs; nbs = n/bs;
3290 
3291   /* read in row lengths and build row indices */
3292   PetscCall(PetscMalloc1(m+1,&rowidxs));
3293   PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT));
3294   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3295   PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer)));
3296   PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
3297 
3298   /* read in column indices and matrix values */
3299   PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals));
3300   PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT));
3301   PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR));
3302 
3303   { /* preallocate matrix storage */
3304     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3305     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3306     PetscBool  sbaij,done;
3307     PetscInt   *d_nnz,*o_nnz;
3308 
3309     PetscCall(PetscBTCreate(nbs,&bt));
3310     PetscCall(PetscHSetICreate(&ht));
3311     PetscCall(PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz));
3312     PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij));
3313     for (i=0; i<mbs; i++) {
3314       PetscCall(PetscBTMemzero(nbs,bt));
3315       PetscCall(PetscHSetIClear(ht));
3316       for (k=0; k<bs; k++) {
3317         PetscInt row = bs*i + k;
3318         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3319           PetscInt col = colidxs[j];
3320           if (!sbaij || col >= row) {
3321             if (col >= cs && col < ce) {
3322               if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++;
3323             } else {
3324               PetscCall(PetscHSetIQueryAdd(ht,col/bs,&done));
3325               if (done) o_nnz[i]++;
3326             }
3327           }
3328         }
3329       }
3330     }
3331     PetscCall(PetscBTDestroy(&bt));
3332     PetscCall(PetscHSetIDestroy(&ht));
3333     PetscCall(MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz));
3334     PetscCall(MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz));
3335     PetscCall(PetscFree2(d_nnz,o_nnz));
3336   }
3337 
3338   /* store matrix values */
3339   for (i=0; i<m; i++) {
3340     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1];
3341     PetscCall((*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES));
3342   }
3343 
3344   PetscCall(PetscFree(rowidxs));
3345   PetscCall(PetscFree2(colidxs,matvals));
3346   PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY));
3347   PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY));
3348   PetscFunctionReturn(0);
3349 }
3350 
3351 PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer)
3352 {
3353   PetscBool isbinary;
3354 
3355   PetscFunctionBegin;
3356   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
3357   PetscCheck(isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3358   PetscCall(MatLoad_MPIBAIJ_Binary(mat,viewer));
3359   PetscFunctionReturn(0);
3360 }
3361 
3362 /*@
3363    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3364 
3365    Input Parameters:
3366 +  mat  - the matrix
3367 -  fact - factor
3368 
3369    Not Collective, each process can use a different factor
3370 
3371    Level: advanced
3372 
3373   Notes:
3374    This can also be set by the command line option: -mat_use_hash_table <fact>
3375 
3376 .seealso: `MatSetOption()`
3377 @*/
3378 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3379 {
3380   PetscFunctionBegin;
3381   PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));
3382   PetscFunctionReturn(0);
3383 }
3384 
3385 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3386 {
3387   Mat_MPIBAIJ *baij;
3388 
3389   PetscFunctionBegin;
3390   baij          = (Mat_MPIBAIJ*)mat->data;
3391   baij->ht_fact = fact;
3392   PetscFunctionReturn(0);
3393 }
3394 
3395 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3396 {
3397   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3398   PetscBool    flg;
3399 
3400   PetscFunctionBegin;
3401   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg));
3402   PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input");
3403   if (Ad)     *Ad     = a->A;
3404   if (Ao)     *Ao     = a->B;
3405   if (colmap) *colmap = a->garray;
3406   PetscFunctionReturn(0);
3407 }
3408 
3409 /*
3410     Special version for direct calls from Fortran (to eliminate two function call overheads
3411 */
3412 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3413 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3414 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3415 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3416 #endif
3417 
3418 /*@C
3419   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3420 
3421   Collective on Mat
3422 
3423   Input Parameters:
3424 + mat - the matrix
3425 . min - number of input rows
3426 . im - input rows
3427 . nin - number of input columns
3428 . in - input columns
3429 . v - numerical values input
3430 - addvin - INSERT_VALUES or ADD_VALUES
3431 
3432   Notes:
3433     This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3434 
3435   Level: advanced
3436 
3437 .seealso: `MatSetValuesBlocked()`
3438 @*/
3439 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3440 {
3441   /* convert input arguments to C version */
3442   Mat        mat  = *matin;
3443   PetscInt   m    = *min, n = *nin;
3444   InsertMode addv = *addvin;
3445 
3446   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3447   const MatScalar *value;
3448   MatScalar       *barray     = baij->barray;
3449   PetscBool       roworiented = baij->roworiented;
3450   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3451   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3452   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3453 
3454   PetscFunctionBegin;
3455   /* tasks normally handled by MatSetValuesBlocked() */
3456   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3457   else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3458   PetscCheck(!mat->factortype,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3459   if (mat->assembled) {
3460     mat->was_assembled = PETSC_TRUE;
3461     mat->assembled     = PETSC_FALSE;
3462   }
3463   PetscCall(PetscLogEventBegin(MAT_SetValues,mat,0,0,0));
3464 
3465   if (!barray) {
3466     PetscCall(PetscMalloc1(bs2,&barray));
3467     baij->barray = barray;
3468   }
3469 
3470   if (roworiented) stepval = (n-1)*bs;
3471   else stepval = (m-1)*bs;
3472 
3473   for (i=0; i<m; i++) {
3474     if (im[i] < 0) continue;
3475     PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1);
3476     if (im[i] >= rstart && im[i] < rend) {
3477       row = im[i] - rstart;
3478       for (j=0; j<n; j++) {
3479         /* If NumCol = 1 then a copy is not required */
3480         if ((roworiented) && (n == 1)) {
3481           barray = (MatScalar*)v + i*bs2;
3482         } else if ((!roworiented) && (m == 1)) {
3483           barray = (MatScalar*)v + j*bs2;
3484         } else { /* Here a copy is required */
3485           if (roworiented) {
3486             value = v + i*(stepval+bs)*bs + j*bs;
3487           } else {
3488             value = v + j*(stepval+bs)*bs + i*bs;
3489           }
3490           for (ii=0; ii<bs; ii++,value+=stepval) {
3491             for (jj=0; jj<bs; jj++) {
3492               *barray++ = *value++;
3493             }
3494           }
3495           barray -=bs2;
3496         }
3497 
3498         if (in[j] >= cstart && in[j] < cend) {
3499           col  = in[j] - cstart;
3500           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]));
3501         } else if (in[j] < 0) continue;
3502         else PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1);
3503         else {
3504           if (mat->was_assembled) {
3505             if (!baij->colmap) {
3506               PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3507             }
3508 
3509 #if defined(PETSC_USE_DEBUG)
3510 #if defined(PETSC_USE_CTABLE)
3511             { PetscInt data;
3512               PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data));
3513               PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3514             }
3515 #else
3516             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3517 #endif
3518 #endif
3519 #if defined(PETSC_USE_CTABLE)
3520             PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col));
3521             col  = (col - 1)/bs;
3522 #else
3523             col = (baij->colmap[in[j]] - 1)/bs;
3524 #endif
3525             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3526               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3527               col  =  in[j];
3528             }
3529           } else col = in[j];
3530           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]));
3531         }
3532       }
3533     } else {
3534       if (!baij->donotstash) {
3535         if (roworiented) {
3536           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
3537         } else {
3538           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
3539         }
3540       }
3541     }
3542   }
3543 
3544   /* task normally handled by MatSetValuesBlocked() */
3545   PetscCall(PetscLogEventEnd(MAT_SetValues,mat,0,0,0));
3546   PetscFunctionReturn(0);
3547 }
3548 
3549 /*@
3550      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block
3551          CSR format the local rows.
3552 
3553    Collective
3554 
3555    Input Parameters:
3556 +  comm - MPI communicator
3557 .  bs - the block size, only a block size of 1 is supported
3558 .  m - number of local rows (Cannot be PETSC_DECIDE)
3559 .  n - This value should be the same as the local size used in creating the
3560        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3561        calculated if N is given) For square matrices n is almost always m.
3562 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3563 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3564 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3565 .   j - column indices
3566 -   a - matrix values
3567 
3568    Output Parameter:
3569 .   mat - the matrix
3570 
3571    Level: intermediate
3572 
3573    Notes:
3574        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3575      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3576      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3577 
3578      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3579      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3580      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3581      with column-major ordering within blocks.
3582 
3583        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3584 
3585 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3586           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3587 @*/
3588 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3589 {
3590   PetscFunctionBegin;
3591   PetscCheck(!i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3592   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3593   PetscCall(MatCreate(comm,mat));
3594   PetscCall(MatSetSizes(*mat,m,n,M,N));
3595   PetscCall(MatSetType(*mat,MATMPIBAIJ));
3596   PetscCall(MatSetBlockSize(*mat,bs));
3597   PetscCall(MatSetUp(*mat));
3598   PetscCall(MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE));
3599   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a));
3600   PetscCall(MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE));
3601   PetscFunctionReturn(0);
3602 }
3603 
3604 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3605 {
3606   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3607   PetscInt       *indx;
3608   PetscScalar    *values;
3609 
3610   PetscFunctionBegin;
3611   PetscCall(MatGetSize(inmat,&m,&N));
3612   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3613     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
3614     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3615     PetscInt       *bindx,rmax=a->rmax,j;
3616     PetscMPIInt    rank,size;
3617 
3618     PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
3619     mbs = m/bs; Nbs = N/cbs;
3620     if (n == PETSC_DECIDE) {
3621       PetscCall(PetscSplitOwnershipBlock(comm,cbs,&n,&N));
3622     }
3623     nbs = n/cbs;
3624 
3625     PetscCall(PetscMalloc1(rmax,&bindx));
3626     MatPreallocateBegin(comm,mbs,nbs,dnz,onz); /* inline function, output __end and __rstart are used below */
3627 
3628     PetscCallMPI(MPI_Comm_rank(comm,&rank));
3629     PetscCallMPI(MPI_Comm_rank(comm,&size));
3630     if (rank == size-1) {
3631       /* Check sum(nbs) = Nbs */
3632       PetscCheck(__end == Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT,__end,Nbs);
3633     }
3634 
3635     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3636     for (i=0; i<mbs; i++) {
3637       PetscCall(MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL)); /* non-blocked nnz and indx */
3638       nnz = nnz/bs;
3639       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3640       PetscCall(MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz));
3641       PetscCall(MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL));
3642     }
3643     PetscCall(PetscFree(bindx));
3644 
3645     PetscCall(MatCreate(comm,outmat));
3646     PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
3647     PetscCall(MatSetBlockSizes(*outmat,bs,cbs));
3648     PetscCall(MatSetType(*outmat,MATBAIJ));
3649     PetscCall(MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz));
3650     PetscCall(MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz));
3651     MatPreallocateEnd(dnz,onz);
3652     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
3653   }
3654 
3655   /* numeric phase */
3656   PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
3657   PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL));
3658 
3659   for (i=0; i<m; i++) {
3660     PetscCall(MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values));
3661     Ii   = i + rstart;
3662     PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES));
3663     PetscCall(MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values));
3664   }
3665   PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY));
3666   PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY));
3667   PetscFunctionReturn(0);
3668 }
3669