xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 117ef88edefbfc12e7c19efe87a19a2e1b0acd4f)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 /* This could be moved to matimpl.h */
12 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
13 {
14   Mat            preallocator;
15   PetscInt       r,rstart,rend;
16   PetscInt       bs,i,m,n,M,N;
17   PetscBool      cong = PETSC_TRUE;
18   PetscErrorCode ierr;
19 
20   PetscFunctionBegin;
21   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
22   PetscValidLogicalCollectiveInt(B,nm,2);
23   for (i = 0; i < nm; i++) {
24     PetscValidHeaderSpecific(X[i],MAT_CLASSID,3);
25     ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr);
26     if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts");
27   }
28   PetscValidLogicalCollectiveBool(B,fill,5);
29   ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr);
30   ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr);
31   ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr);
32   ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr);
33   ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr);
34   ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr);
35   ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr);
36   ierr = MatSetUp(preallocator);CHKERRQ(ierr);
37   ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr);
38   for (r = rstart; r < rend; ++r) {
39     PetscInt          ncols;
40     const PetscInt    *row;
41     const PetscScalar *vals;
42 
43     for (i = 0; i < nm; i++) {
44       ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
45       ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
46       if (symm && symm[i]) {
47         ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
48       }
49       ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
50     }
51   }
52   ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
53   ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
54   ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr);
55   ierr = MatDestroy(&preallocator);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 
59 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
60 {
61   Mat            B;
62   PetscErrorCode ierr;
63   PetscInt       r;
64 
65   PetscFunctionBegin;
66   if (reuse != MAT_REUSE_MATRIX) {
67     PetscBool symm = PETSC_TRUE,isdense;
68     PetscInt  bs;
69 
70     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
71     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
72     ierr = MatSetType(B,newtype);CHKERRQ(ierr);
73     ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
74     ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr);
75     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
76     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
77     ierr = PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,"");CHKERRQ(ierr);
78     if (!isdense) {
79       ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
80       ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr);
81       ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
82     } else {
83       ierr = MatSetUp(B);CHKERRQ(ierr);
84     }
85   } else {
86     B    = *newmat;
87     ierr = MatZeroEntries(B);CHKERRQ(ierr);
88   }
89 
90   ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
91   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
92     PetscInt          ncols;
93     const PetscInt    *row;
94     const PetscScalar *vals;
95 
96     ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
97     ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
98 #if defined(PETSC_USE_COMPLEX)
99     if (A->hermitian) {
100       PetscInt i;
101       for (i = 0; i < ncols; i++) {
102         ierr = MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES);CHKERRQ(ierr);
103       }
104     } else {
105       ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
106     }
107 #else
108     ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
109 #endif
110     ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
111   }
112   ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
113   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
114   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
115 
116   if (reuse == MAT_INPLACE_MATRIX) {
117     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
118   } else {
119     *newmat = B;
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
125 {
126   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
127   PetscErrorCode ierr;
128 
129   PetscFunctionBegin;
130   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
131   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
132   PetscFunctionReturn(0);
133 }
134 
135 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
136 {
137   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
138   PetscErrorCode ierr;
139 
140   PetscFunctionBegin;
141   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
142   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
143   PetscFunctionReturn(0);
144 }
145 
146 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
147   { \
148     brow = row/bs;  \
149     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
150     rmax = aimax[brow]; nrow = ailen[brow]; \
151     bcol = col/bs; \
152     ridx = row % bs; cidx = col % bs; \
153     low  = 0; high = nrow; \
154     while (high-low > 3) { \
155       t = (low+high)/2; \
156       if (rp[t] > bcol) high = t; \
157       else              low  = t; \
158     } \
159     for (_i=low; _i<high; _i++) { \
160       if (rp[_i] > bcol) break; \
161       if (rp[_i] == bcol) { \
162         bap = ap + bs2*_i + bs*cidx + ridx; \
163         if (addv == ADD_VALUES) *bap += value;  \
164         else                    *bap  = value;  \
165         goto a_noinsert; \
166       } \
167     } \
168     if (a->nonew == 1) goto a_noinsert; \
169     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
170     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
171     N = nrow++ - 1;  \
172     /* shift up all the later entries in this row */ \
173     ierr  = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \
174     ierr  = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
175     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
176     rp[_i]                      = bcol;  \
177     ap[bs2*_i + bs*cidx + ridx] = value;  \
178     A->nonzerostate++;\
179 a_noinsert:; \
180     ailen[brow] = nrow; \
181   }
182 
183 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
184   { \
185     brow = row/bs;  \
186     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
187     rmax = bimax[brow]; nrow = bilen[brow]; \
188     bcol = col/bs; \
189     ridx = row % bs; cidx = col % bs; \
190     low  = 0; high = nrow; \
191     while (high-low > 3) { \
192       t = (low+high)/2; \
193       if (rp[t] > bcol) high = t; \
194       else              low  = t; \
195     } \
196     for (_i=low; _i<high; _i++) { \
197       if (rp[_i] > bcol) break; \
198       if (rp[_i] == bcol) { \
199         bap = ap + bs2*_i + bs*cidx + ridx; \
200         if (addv == ADD_VALUES) *bap += value;  \
201         else                    *bap  = value;  \
202         goto b_noinsert; \
203       } \
204     } \
205     if (b->nonew == 1) goto b_noinsert; \
206     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
207     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
208     N = nrow++ - 1;  \
209     /* shift up all the later entries in this row */ \
210     ierr  = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \
211     ierr  = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
212     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \
213     rp[_i]                      = bcol;  \
214     ap[bs2*_i + bs*cidx + ridx] = value;  \
215     B->nonzerostate++;\
216 b_noinsert:; \
217     bilen[brow] = nrow; \
218   }
219 
220 /* Only add/insert a(i,j) with i<=j (blocks).
221    Any a(i,j) with i>j input by user is ingored or generates an error
222 */
223 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
224 {
225   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
226   MatScalar      value;
227   PetscBool      roworiented = baij->roworiented;
228   PetscErrorCode ierr;
229   PetscInt       i,j,row,col;
230   PetscInt       rstart_orig=mat->rmap->rstart;
231   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
232   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
233 
234   /* Some Variables required in the macro */
235   Mat          A     = baij->A;
236   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
237   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
238   MatScalar    *aa   =a->a;
239 
240   Mat         B     = baij->B;
241   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
242   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
243   MatScalar   *ba   =b->a;
244 
245   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
246   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
247   MatScalar *ap,*bap;
248 
249   /* for stash */
250   PetscInt  n_loc, *in_loc = NULL;
251   MatScalar *v_loc = NULL;
252 
253   PetscFunctionBegin;
254   if (!baij->donotstash) {
255     if (n > baij->n_loc) {
256       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
257       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
258       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
259       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
260 
261       baij->n_loc = n;
262     }
263     in_loc = baij->in_loc;
264     v_loc  = baij->v_loc;
265   }
266 
267   for (i=0; i<m; i++) {
268     if (im[i] < 0) continue;
269     if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
270     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
271       row = im[i] - rstart_orig;              /* local row index */
272       for (j=0; j<n; j++) {
273         if (im[i]/bs > in[j]/bs) {
274           if (a->ignore_ltriangular) {
275             continue;    /* ignore lower triangular blocks */
276           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
277         }
278         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
279           col  = in[j] - cstart_orig;         /* local col index */
280           brow = row/bs; bcol = col/bs;
281           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
282           if (roworiented) value = v[i*n+j];
283           else             value = v[i+j*m];
284           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
285           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
286         } else if (in[j] < 0) continue;
287         else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
288         else {  /* off-diag entry (B) */
289           if (mat->was_assembled) {
290             if (!baij->colmap) {
291               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
292             }
293 #if defined(PETSC_USE_CTABLE)
294             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
295             col  = col - 1;
296 #else
297             col = baij->colmap[in[j]/bs] - 1;
298 #endif
299             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
300               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
301               col  =  in[j];
302               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
303               B    = baij->B;
304               b    = (Mat_SeqBAIJ*)(B)->data;
305               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
306               ba   = b->a;
307             } else col += in[j]%bs;
308           } else col = in[j];
309           if (roworiented) value = v[i*n+j];
310           else             value = v[i+j*m];
311           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
312           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
313         }
314       }
315     } else {  /* off processor entry */
316       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
317       if (!baij->donotstash) {
318         mat->assembled = PETSC_FALSE;
319         n_loc          = 0;
320         for (j=0; j<n; j++) {
321           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
322           in_loc[n_loc] = in[j];
323           if (roworiented) {
324             v_loc[n_loc] = v[i*n+j];
325           } else {
326             v_loc[n_loc] = v[j*m+i];
327           }
328           n_loc++;
329         }
330         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
331       }
332     }
333   }
334   PetscFunctionReturn(0);
335 }
336 
337 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
338 {
339   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
340   PetscErrorCode    ierr;
341   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
342   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
343   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
344   PetscBool         roworiented=a->roworiented;
345   const PetscScalar *value     = v;
346   MatScalar         *ap,*aa = a->a,*bap;
347 
348   PetscFunctionBegin;
349   if (col < row) {
350     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
351     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
352   }
353   rp   = aj + ai[row];
354   ap   = aa + bs2*ai[row];
355   rmax = imax[row];
356   nrow = ailen[row];
357   value = v;
358   low   = 0;
359   high  = nrow;
360 
361   while (high-low > 7) {
362     t = (low+high)/2;
363     if (rp[t] > col) high = t;
364     else             low  = t;
365   }
366   for (i=low; i<high; i++) {
367     if (rp[i] > col) break;
368     if (rp[i] == col) {
369       bap = ap +  bs2*i;
370       if (roworiented) {
371         if (is == ADD_VALUES) {
372           for (ii=0; ii<bs; ii++) {
373             for (jj=ii; jj<bs2; jj+=bs) {
374               bap[jj] += *value++;
375             }
376           }
377         } else {
378           for (ii=0; ii<bs; ii++) {
379             for (jj=ii; jj<bs2; jj+=bs) {
380               bap[jj] = *value++;
381             }
382           }
383         }
384       } else {
385         if (is == ADD_VALUES) {
386           for (ii=0; ii<bs; ii++) {
387             for (jj=0; jj<bs; jj++) {
388               *bap++ += *value++;
389             }
390           }
391         } else {
392           for (ii=0; ii<bs; ii++) {
393             for (jj=0; jj<bs; jj++) {
394               *bap++  = *value++;
395             }
396           }
397         }
398       }
399       goto noinsert2;
400     }
401   }
402   if (nonew == 1) goto noinsert2;
403   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
404   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
405   N = nrow++ - 1; high++;
406   /* shift up all the later entries in this row */
407   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
408   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
409   rp[i] = col;
410   bap   = ap +  bs2*i;
411   if (roworiented) {
412     for (ii=0; ii<bs; ii++) {
413       for (jj=ii; jj<bs2; jj+=bs) {
414         bap[jj] = *value++;
415       }
416     }
417   } else {
418     for (ii=0; ii<bs; ii++) {
419       for (jj=0; jj<bs; jj++) {
420         *bap++ = *value++;
421       }
422     }
423   }
424   noinsert2:;
425   ailen[row] = nrow;
426   PetscFunctionReturn(0);
427 }
428 
429 /*
430    This routine is exactly duplicated in mpibaij.c
431 */
432 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
433 {
434   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
435   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
436   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
437   PetscErrorCode    ierr;
438   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
439   PetscBool         roworiented=a->roworiented;
440   const PetscScalar *value     = v;
441   MatScalar         *ap,*aa = a->a,*bap;
442 
443   PetscFunctionBegin;
444   rp   = aj + ai[row];
445   ap   = aa + bs2*ai[row];
446   rmax = imax[row];
447   nrow = ailen[row];
448   low  = 0;
449   high = nrow;
450   value = v;
451   while (high-low > 7) {
452     t = (low+high)/2;
453     if (rp[t] > col) high = t;
454     else             low  = t;
455   }
456   for (i=low; i<high; i++) {
457     if (rp[i] > col) break;
458     if (rp[i] == col) {
459       bap = ap +  bs2*i;
460       if (roworiented) {
461         if (is == ADD_VALUES) {
462           for (ii=0; ii<bs; ii++) {
463             for (jj=ii; jj<bs2; jj+=bs) {
464               bap[jj] += *value++;
465             }
466           }
467         } else {
468           for (ii=0; ii<bs; ii++) {
469             for (jj=ii; jj<bs2; jj+=bs) {
470               bap[jj] = *value++;
471             }
472           }
473         }
474       } else {
475         if (is == ADD_VALUES) {
476           for (ii=0; ii<bs; ii++,value+=bs) {
477             for (jj=0; jj<bs; jj++) {
478               bap[jj] += value[jj];
479             }
480             bap += bs;
481           }
482         } else {
483           for (ii=0; ii<bs; ii++,value+=bs) {
484             for (jj=0; jj<bs; jj++) {
485               bap[jj]  = value[jj];
486             }
487             bap += bs;
488           }
489         }
490       }
491       goto noinsert2;
492     }
493   }
494   if (nonew == 1) goto noinsert2;
495   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
496   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
497   N = nrow++ - 1; high++;
498   /* shift up all the later entries in this row */
499   ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
500   ierr  = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
501   rp[i] = col;
502   bap   = ap +  bs2*i;
503   if (roworiented) {
504     for (ii=0; ii<bs; ii++) {
505       for (jj=ii; jj<bs2; jj+=bs) {
506         bap[jj] = *value++;
507       }
508     }
509   } else {
510     for (ii=0; ii<bs; ii++) {
511       for (jj=0; jj<bs; jj++) {
512         *bap++ = *value++;
513       }
514     }
515   }
516   noinsert2:;
517   ailen[row] = nrow;
518   PetscFunctionReturn(0);
519 }
520 
521 /*
522     This routine could be optimized by removing the need for the block copy below and passing stride information
523   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
524 */
525 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
526 {
527   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
528   const MatScalar *value;
529   MatScalar       *barray     =baij->barray;
530   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
531   PetscErrorCode  ierr;
532   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
533   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
534   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
535 
536   PetscFunctionBegin;
537   if (!barray) {
538     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
539     baij->barray = barray;
540   }
541 
542   if (roworiented) {
543     stepval = (n-1)*bs;
544   } else {
545     stepval = (m-1)*bs;
546   }
547   for (i=0; i<m; i++) {
548     if (im[i] < 0) continue;
549     if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
550     if (im[i] >= rstart && im[i] < rend) {
551       row = im[i] - rstart;
552       for (j=0; j<n; j++) {
553         if (im[i] > in[j]) {
554           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
555           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
556         }
557         /* If NumCol = 1 then a copy is not required */
558         if ((roworiented) && (n == 1)) {
559           barray = (MatScalar*) v + i*bs2;
560         } else if ((!roworiented) && (m == 1)) {
561           barray = (MatScalar*) v + j*bs2;
562         } else { /* Here a copy is required */
563           if (roworiented) {
564             value = v + i*(stepval+bs)*bs + j*bs;
565           } else {
566             value = v + j*(stepval+bs)*bs + i*bs;
567           }
568           for (ii=0; ii<bs; ii++,value+=stepval) {
569             for (jj=0; jj<bs; jj++) {
570               *barray++ = *value++;
571             }
572           }
573           barray -=bs2;
574         }
575 
576         if (in[j] >= cstart && in[j] < cend) {
577           col  = in[j] - cstart;
578           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
579         } else if (in[j] < 0) continue;
580         else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
581         else {
582           if (mat->was_assembled) {
583             if (!baij->colmap) {
584               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
585             }
586 
587 #if defined(PETSC_USE_DEBUG)
588 #if defined(PETSC_USE_CTABLE)
589             { PetscInt data;
590               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
591               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
592             }
593 #else
594             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
595 #endif
596 #endif
597 #if defined(PETSC_USE_CTABLE)
598             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
599             col  = (col - 1)/bs;
600 #else
601             col = (baij->colmap[in[j]] - 1)/bs;
602 #endif
603             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
604               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
605               col  = in[j];
606             }
607           } else col = in[j];
608           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
609         }
610       }
611     } else {
612       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
613       if (!baij->donotstash) {
614         if (roworiented) {
615           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
616         } else {
617           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
618         }
619       }
620     }
621   }
622   PetscFunctionReturn(0);
623 }
624 
625 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
626 {
627   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
628   PetscErrorCode ierr;
629   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
630   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
631 
632   PetscFunctionBegin;
633   for (i=0; i<m; i++) {
634     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
635     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
636     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
637       row = idxm[i] - bsrstart;
638       for (j=0; j<n; j++) {
639         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
640         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
641         if (idxn[j] >= bscstart && idxn[j] < bscend) {
642           col  = idxn[j] - bscstart;
643           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
644         } else {
645           if (!baij->colmap) {
646             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
647           }
648 #if defined(PETSC_USE_CTABLE)
649           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
650           data--;
651 #else
652           data = baij->colmap[idxn[j]/bs]-1;
653 #endif
654           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
655           else {
656             col  = data + idxn[j]%bs;
657             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
658           }
659         }
660       }
661     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
662   }
663   PetscFunctionReturn(0);
664 }
665 
666 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
667 {
668   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
669   PetscErrorCode ierr;
670   PetscReal      sum[2],*lnorm2;
671 
672   PetscFunctionBegin;
673   if (baij->size == 1) {
674     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
675   } else {
676     if (type == NORM_FROBENIUS) {
677       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
678       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
679       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
680       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
681       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
682       ierr    = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
683       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
684       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
685     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
686       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
687       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
688       PetscReal    *rsum,*rsum2,vabs;
689       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
690       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
691       MatScalar    *v;
692 
693       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
694       ierr = PetscArrayzero(rsum,mat->cmap->N);CHKERRQ(ierr);
695       /* Amat */
696       v = amat->a; jj = amat->j;
697       for (brow=0; brow<mbs; brow++) {
698         grow = bs*(rstart + brow);
699         nz   = amat->i[brow+1] - amat->i[brow];
700         for (bcol=0; bcol<nz; bcol++) {
701           gcol = bs*(rstart + *jj); jj++;
702           for (col=0; col<bs; col++) {
703             for (row=0; row<bs; row++) {
704               vabs            = PetscAbsScalar(*v); v++;
705               rsum[gcol+col] += vabs;
706               /* non-diagonal block */
707               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
708             }
709           }
710         }
711         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
712       }
713       /* Bmat */
714       v = bmat->a; jj = bmat->j;
715       for (brow=0; brow<mbs; brow++) {
716         grow = bs*(rstart + brow);
717         nz = bmat->i[brow+1] - bmat->i[brow];
718         for (bcol=0; bcol<nz; bcol++) {
719           gcol = bs*garray[*jj]; jj++;
720           for (col=0; col<bs; col++) {
721             for (row=0; row<bs; row++) {
722               vabs            = PetscAbsScalar(*v); v++;
723               rsum[gcol+col] += vabs;
724               rsum[grow+row] += vabs;
725             }
726           }
727         }
728         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
729       }
730       ierr  = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
731       *norm = 0.0;
732       for (col=0; col<mat->cmap->N; col++) {
733         if (rsum2[col] > *norm) *norm = rsum2[col];
734       }
735       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
736     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
737   }
738   PetscFunctionReturn(0);
739 }
740 
741 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
742 {
743   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
744   PetscErrorCode ierr;
745   PetscInt       nstash,reallocs;
746 
747   PetscFunctionBegin;
748   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
749 
750   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
751   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
752   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
753   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
754   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
755   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
756   PetscFunctionReturn(0);
757 }
758 
759 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
760 {
761   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
762   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
763   PetscErrorCode ierr;
764   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
765   PetscInt       *row,*col;
766   PetscBool      other_disassembled;
767   PetscMPIInt    n;
768   PetscBool      r1,r2,r3;
769   MatScalar      *val;
770 
771   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
772   PetscFunctionBegin;
773   if (!baij->donotstash &&  !mat->nooffprocentries) {
774     while (1) {
775       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
776       if (!flg) break;
777 
778       for (i=0; i<n;) {
779         /* Now identify the consecutive vals belonging to the same row */
780         for (j=i,rstart=row[j]; j<n; j++) {
781           if (row[j] != rstart) break;
782         }
783         if (j < n) ncols = j-i;
784         else       ncols = n-i;
785         /* Now assemble all these values with a single function call */
786         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
787         i    = j;
788       }
789     }
790     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
791     /* Now process the block-stash. Since the values are stashed column-oriented,
792        set the roworiented flag to column oriented, and after MatSetValues()
793        restore the original flags */
794     r1 = baij->roworiented;
795     r2 = a->roworiented;
796     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
797 
798     baij->roworiented = PETSC_FALSE;
799     a->roworiented    = PETSC_FALSE;
800 
801     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
802     while (1) {
803       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
804       if (!flg) break;
805 
806       for (i=0; i<n;) {
807         /* Now identify the consecutive vals belonging to the same row */
808         for (j=i,rstart=row[j]; j<n; j++) {
809           if (row[j] != rstart) break;
810         }
811         if (j < n) ncols = j-i;
812         else       ncols = n-i;
813         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
814         i    = j;
815       }
816     }
817     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
818 
819     baij->roworiented = r1;
820     a->roworiented    = r2;
821 
822     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
823   }
824 
825   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
826   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
827 
828   /* determine if any processor has disassembled, if so we must
829      also disassemble ourselfs, in order that we may reassemble. */
830   /*
831      if nonzero structure of submatrix B cannot change then we know that
832      no processor disassembled thus we can skip this stuff
833   */
834   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
835     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
836     if (mat->was_assembled && !other_disassembled) {
837       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
838     }
839   }
840 
841   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
842     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
843   }
844   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
845   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
846 
847   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
848 
849   baij->rowvalues = 0;
850 
851   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
852   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
853     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
854     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
855   }
856   PetscFunctionReturn(0);
857 }
858 
859 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
860 #include <petscdraw.h>
861 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
862 {
863   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
864   PetscErrorCode    ierr;
865   PetscInt          bs   = mat->rmap->bs;
866   PetscMPIInt       rank = baij->rank;
867   PetscBool         iascii,isdraw;
868   PetscViewer       sviewer;
869   PetscViewerFormat format;
870 
871   PetscFunctionBegin;
872   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
873   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
874   if (iascii) {
875     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
876     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
877       MatInfo info;
878       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
879       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
880       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
881       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
882       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
883       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
884       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
885       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
886       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
887       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
888       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
889       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
890       PetscFunctionReturn(0);
891     } else if (format == PETSC_VIEWER_ASCII_INFO) {
892       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
893       PetscFunctionReturn(0);
894     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
895       PetscFunctionReturn(0);
896     }
897   }
898 
899   if (isdraw) {
900     PetscDraw draw;
901     PetscBool isnull;
902     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
903     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
904     if (isnull) PetscFunctionReturn(0);
905   }
906 
907   {
908     /* assemble the entire matrix onto first processor. */
909     Mat          A;
910     Mat_SeqSBAIJ *Aloc;
911     Mat_SeqBAIJ  *Bloc;
912     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
913     MatScalar    *a;
914     const char   *matname;
915 
916     /* Should this be the same type as mat? */
917     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
918     if (!rank) {
919       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
920     } else {
921       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
922     }
923     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
924     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
925     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
926     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
927 
928     /* copy over the A part */
929     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
930     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
931     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
932 
933     for (i=0; i<mbs; i++) {
934       rvals[0] = bs*(baij->rstartbs + i);
935       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
936       for (j=ai[i]; j<ai[i+1]; j++) {
937         col = (baij->cstartbs+aj[j])*bs;
938         for (k=0; k<bs; k++) {
939           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
940           col++;
941           a += bs;
942         }
943       }
944     }
945     /* copy over the B part */
946     Bloc = (Mat_SeqBAIJ*)baij->B->data;
947     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
948     for (i=0; i<mbs; i++) {
949 
950       rvals[0] = bs*(baij->rstartbs + i);
951       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
952       for (j=ai[i]; j<ai[i+1]; j++) {
953         col = baij->garray[aj[j]]*bs;
954         for (k=0; k<bs; k++) {
955           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
956           col++;
957           a += bs;
958         }
959       }
960     }
961     ierr = PetscFree(rvals);CHKERRQ(ierr);
962     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
963     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
964     /*
965        Everyone has to call to draw the matrix since the graphics waits are
966        synchronized across all processors that share the PetscDraw object
967     */
968     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
969     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
970     if (!rank) {
971       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
972       ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
973     }
974     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
975     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
976     ierr = MatDestroy(&A);CHKERRQ(ierr);
977   }
978   PetscFunctionReturn(0);
979 }
980 
981 /* Used for both MPIBAIJ and MPISBAIJ matrices */
982 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
983 
984 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
985 {
986   PetscErrorCode ierr;
987   PetscBool      iascii,isdraw,issocket,isbinary;
988 
989   PetscFunctionBegin;
990   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
991   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
992   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
993   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
994   if (iascii || isdraw || issocket) {
995     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
996   } else if (isbinary) {
997     ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
998   }
999   PetscFunctionReturn(0);
1000 }
1001 
1002 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
1003 {
1004   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1005   PetscErrorCode ierr;
1006 
1007   PetscFunctionBegin;
1008 #if defined(PETSC_USE_LOG)
1009   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1010 #endif
1011   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1012   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1013   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1014   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1015 #if defined(PETSC_USE_CTABLE)
1016   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1017 #else
1018   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1019 #endif
1020   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1021   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1022   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1023   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
1024   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
1025   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
1026   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
1027   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
1028   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
1029   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1030   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1031   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1032   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
1033   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
1034   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
1035 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1036   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1037 #endif
1038   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
1039   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
1040   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1041   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1042 
1043   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1044   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1045   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1046   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1047   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1048 #if defined(PETSC_HAVE_ELEMENTAL)
1049   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
1050 #endif
1051   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr);
1052   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr);
1053   PetscFunctionReturn(0);
1054 }
1055 
1056 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1057 {
1058   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1059   PetscErrorCode    ierr;
1060   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1061   PetscScalar       *from;
1062   const PetscScalar *x;
1063 
1064   PetscFunctionBegin;
1065   /* diagonal part */
1066   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1067   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1068 
1069   /* subdiagonal part */
1070   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1071 
1072   /* copy x into the vec slvec0 */
1073   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1074   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1075 
1076   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1077   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1078   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1079 
1080   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1081   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1082   /* supperdiagonal part */
1083   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1088 {
1089   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1090   PetscErrorCode    ierr;
1091   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1092   PetscScalar       *from;
1093   const PetscScalar *x;
1094 
1095   PetscFunctionBegin;
1096   /* diagonal part */
1097   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1098   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1099 
1100   /* subdiagonal part */
1101   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1102 
1103   /* copy x into the vec slvec0 */
1104   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1105   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1106 
1107   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1108   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1109   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1110 
1111   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1113   /* supperdiagonal part */
1114   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1119 {
1120   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1121   PetscErrorCode ierr;
1122   PetscInt       nt;
1123 
1124   PetscFunctionBegin;
1125   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1126   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1127 
1128   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1129   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1130 
1131   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1132   /* do diagonal part */
1133   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1134   /* do supperdiagonal part */
1135   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1136   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1137   /* do subdiagonal part */
1138   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1139   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1141   PetscFunctionReturn(0);
1142 }
1143 
1144 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz)
1145 {
1146   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1147   PetscErrorCode    ierr;
1148   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1149   PetscScalar       *from,zero=0.0;
1150   const PetscScalar *x;
1151 
1152   PetscFunctionBegin;
1153   /* diagonal part */
1154   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1155   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1156 
1157   /* subdiagonal part */
1158   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1159 
1160   /* copy x into the vec slvec0 */
1161   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1162   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1163   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1164   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1165 
1166   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1167   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1168   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1169 
1170   /* supperdiagonal part */
1171   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1176 {
1177   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1178   PetscErrorCode    ierr;
1179   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1180   PetscScalar       *from,zero=0.0;
1181   const PetscScalar *x;
1182 
1183   PetscFunctionBegin;
1184   /* diagonal part */
1185   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1186   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1187 
1188   /* subdiagonal part */
1189   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1190 
1191   /* copy x into the vec slvec0 */
1192   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1193   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1194   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1195   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1196 
1197   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1198   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1199   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1200 
1201   /* supperdiagonal part */
1202   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1203   PetscFunctionReturn(0);
1204 }
1205 
1206 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1207 {
1208   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1209   PetscErrorCode ierr;
1210 
1211   PetscFunctionBegin;
1212   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1213   /* do diagonal part */
1214   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1215   /* do supperdiagonal part */
1216   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1217   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1218 
1219   /* do subdiagonal part */
1220   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1221   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1222   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1223   PetscFunctionReturn(0);
1224 }
1225 
1226 /*
1227   This only works correctly for square matrices where the subblock A->A is the
1228    diagonal block
1229 */
1230 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1231 {
1232   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1233   PetscErrorCode ierr;
1234 
1235   PetscFunctionBegin;
1236   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1237   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1242 {
1243   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1244   PetscErrorCode ierr;
1245 
1246   PetscFunctionBegin;
1247   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1248   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1253 {
1254   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1255   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1256   PetscErrorCode ierr;
1257   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1258   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1259   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1260 
1261   PetscFunctionBegin;
1262   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1263   mat->getrowactive = PETSC_TRUE;
1264 
1265   if (!mat->rowvalues && (idx || v)) {
1266     /*
1267         allocate enough space to hold information from the longest row.
1268     */
1269     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1270     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1271     PetscInt     max = 1,mbs = mat->mbs,tmp;
1272     for (i=0; i<mbs; i++) {
1273       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1274       if (max < tmp) max = tmp;
1275     }
1276     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1277   }
1278 
1279   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1280   lrow = row - brstart;  /* local row index */
1281 
1282   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1283   if (!v)   {pvA = 0; pvB = 0;}
1284   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1285   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1286   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1287   nztot = nzA + nzB;
1288 
1289   cmap = mat->garray;
1290   if (v  || idx) {
1291     if (nztot) {
1292       /* Sort by increasing column numbers, assuming A and B already sorted */
1293       PetscInt imark = -1;
1294       if (v) {
1295         *v = v_p = mat->rowvalues;
1296         for (i=0; i<nzB; i++) {
1297           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1298           else break;
1299         }
1300         imark = i;
1301         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1302         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1303       }
1304       if (idx) {
1305         *idx = idx_p = mat->rowindices;
1306         if (imark > -1) {
1307           for (i=0; i<imark; i++) {
1308             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1309           }
1310         } else {
1311           for (i=0; i<nzB; i++) {
1312             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1313             else break;
1314           }
1315           imark = i;
1316         }
1317         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1318         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1319       }
1320     } else {
1321       if (idx) *idx = 0;
1322       if (v)   *v   = 0;
1323     }
1324   }
1325   *nz  = nztot;
1326   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1327   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1332 {
1333   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1334 
1335   PetscFunctionBegin;
1336   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1337   baij->getrowactive = PETSC_FALSE;
1338   PetscFunctionReturn(0);
1339 }
1340 
1341 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1342 {
1343   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1344   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1345 
1346   PetscFunctionBegin;
1347   aA->getrow_utriangular = PETSC_TRUE;
1348   PetscFunctionReturn(0);
1349 }
1350 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1351 {
1352   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1353   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1354 
1355   PetscFunctionBegin;
1356   aA->getrow_utriangular = PETSC_FALSE;
1357   PetscFunctionReturn(0);
1358 }
1359 
1360 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1361 {
1362   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1363   PetscErrorCode ierr;
1364 
1365   PetscFunctionBegin;
1366   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1367   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1372 {
1373   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1374   PetscErrorCode ierr;
1375 
1376   PetscFunctionBegin;
1377   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1378   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1379   PetscFunctionReturn(0);
1380 }
1381 
1382 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1383    Input: isrow       - distributed(parallel),
1384           iscol_local - locally owned (seq)
1385 */
1386 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1387 {
1388   PetscErrorCode ierr;
1389   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1390   const PetscInt *ptr1,*ptr2;
1391 
1392   PetscFunctionBegin;
1393   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1394   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1395   if (sz1 > sz2) {
1396     *flg = PETSC_FALSE;
1397     PetscFunctionReturn(0);
1398   }
1399 
1400   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1401   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1402 
1403   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1404   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1405   ierr = PetscArraycpy(a1,ptr1,sz1);CHKERRQ(ierr);
1406   ierr = PetscArraycpy(a2,ptr2,sz2);CHKERRQ(ierr);
1407   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1408   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1409 
1410   nmatch=0;
1411   k     = 0;
1412   for (i=0; i<sz1; i++){
1413     for (j=k; j<sz2; j++){
1414       if (a1[i] == a2[j]) {
1415         k = j; nmatch++;
1416         break;
1417       }
1418     }
1419   }
1420   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1421   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1422   ierr = PetscFree(a1);CHKERRQ(ierr);
1423   ierr = PetscFree(a2);CHKERRQ(ierr);
1424   if (nmatch < sz1) {
1425     *flg = PETSC_FALSE;
1426   } else {
1427     *flg = PETSC_TRUE;
1428   }
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1433 {
1434   PetscErrorCode ierr;
1435   IS             iscol_local;
1436   PetscInt       csize;
1437   PetscBool      isequal;
1438 
1439   PetscFunctionBegin;
1440   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1441   if (call == MAT_REUSE_MATRIX) {
1442     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1443     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1444   } else {
1445     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1446     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1447     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1448   }
1449 
1450   /* now call MatCreateSubMatrix_MPIBAIJ() */
1451   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1452   if (call == MAT_INITIAL_MATRIX) {
1453     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1454     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1455   }
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1460 {
1461   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1462   PetscErrorCode ierr;
1463 
1464   PetscFunctionBegin;
1465   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1466   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1467   PetscFunctionReturn(0);
1468 }
1469 
1470 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1471 {
1472   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1473   Mat            A  = a->A,B = a->B;
1474   PetscErrorCode ierr;
1475   PetscLogDouble isend[5],irecv[5];
1476 
1477   PetscFunctionBegin;
1478   info->block_size = (PetscReal)matin->rmap->bs;
1479 
1480   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1481 
1482   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1483   isend[3] = info->memory;  isend[4] = info->mallocs;
1484 
1485   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1486 
1487   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1488   isend[3] += info->memory;  isend[4] += info->mallocs;
1489   if (flag == MAT_LOCAL) {
1490     info->nz_used      = isend[0];
1491     info->nz_allocated = isend[1];
1492     info->nz_unneeded  = isend[2];
1493     info->memory       = isend[3];
1494     info->mallocs      = isend[4];
1495   } else if (flag == MAT_GLOBAL_MAX) {
1496     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1497 
1498     info->nz_used      = irecv[0];
1499     info->nz_allocated = irecv[1];
1500     info->nz_unneeded  = irecv[2];
1501     info->memory       = irecv[3];
1502     info->mallocs      = irecv[4];
1503   } else if (flag == MAT_GLOBAL_SUM) {
1504     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1505 
1506     info->nz_used      = irecv[0];
1507     info->nz_allocated = irecv[1];
1508     info->nz_unneeded  = irecv[2];
1509     info->memory       = irecv[3];
1510     info->mallocs      = irecv[4];
1511   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1512   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1513   info->fill_ratio_needed = 0;
1514   info->factor_mallocs    = 0;
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1519 {
1520   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1521   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1522   PetscErrorCode ierr;
1523 
1524   PetscFunctionBegin;
1525   switch (op) {
1526   case MAT_NEW_NONZERO_LOCATIONS:
1527   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1528   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1529   case MAT_KEEP_NONZERO_PATTERN:
1530   case MAT_SUBMAT_SINGLEIS:
1531   case MAT_NEW_NONZERO_LOCATION_ERR:
1532     MatCheckPreallocated(A,1);
1533     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1534     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1535     break;
1536   case MAT_ROW_ORIENTED:
1537     MatCheckPreallocated(A,1);
1538     a->roworiented = flg;
1539 
1540     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1541     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1542     break;
1543   case MAT_NEW_DIAGONALS:
1544   case MAT_SORTED_FULL:
1545     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1546     break;
1547   case MAT_IGNORE_OFF_PROC_ENTRIES:
1548     a->donotstash = flg;
1549     break;
1550   case MAT_USE_HASH_TABLE:
1551     a->ht_flag = flg;
1552     break;
1553   case MAT_HERMITIAN:
1554     MatCheckPreallocated(A,1);
1555     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1556 #if defined(PETSC_USE_COMPLEX)
1557     if (flg) { /* need different mat-vec ops */
1558       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1559       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1560       A->ops->multtranspose    = NULL;
1561       A->ops->multtransposeadd = NULL;
1562       A->symmetric = PETSC_FALSE;
1563     }
1564 #endif
1565     break;
1566   case MAT_SPD:
1567   case MAT_SYMMETRIC:
1568     MatCheckPreallocated(A,1);
1569     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1570 #if defined(PETSC_USE_COMPLEX)
1571     if (flg) { /* restore to use default mat-vec ops */
1572       A->ops->mult             = MatMult_MPISBAIJ;
1573       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1574       A->ops->multtranspose    = MatMult_MPISBAIJ;
1575       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1576     }
1577 #endif
1578     break;
1579   case MAT_STRUCTURALLY_SYMMETRIC:
1580     MatCheckPreallocated(A,1);
1581     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1582     break;
1583   case MAT_SYMMETRY_ETERNAL:
1584     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1585     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1586     break;
1587   case MAT_IGNORE_LOWER_TRIANGULAR:
1588     aA->ignore_ltriangular = flg;
1589     break;
1590   case MAT_ERROR_LOWER_TRIANGULAR:
1591     aA->ignore_ltriangular = flg;
1592     break;
1593   case MAT_GETROW_UPPERTRIANGULAR:
1594     aA->getrow_utriangular = flg;
1595     break;
1596   default:
1597     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1598   }
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1603 {
1604   PetscErrorCode ierr;
1605 
1606   PetscFunctionBegin;
1607   if (reuse == MAT_INITIAL_MATRIX) {
1608     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1609   }  else if (reuse == MAT_REUSE_MATRIX) {
1610     ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1611   }
1612   PetscFunctionReturn(0);
1613 }
1614 
1615 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1616 {
1617   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1618   Mat            a     = baij->A, b=baij->B;
1619   PetscErrorCode ierr;
1620   PetscInt       nv,m,n;
1621   PetscBool      flg;
1622 
1623   PetscFunctionBegin;
1624   if (ll != rr) {
1625     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1626     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1627   }
1628   if (!ll) PetscFunctionReturn(0);
1629 
1630   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1631   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1632 
1633   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1634   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1635 
1636   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1637 
1638   /* left diagonalscale the off-diagonal part */
1639   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1640 
1641   /* scale the diagonal part */
1642   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1643 
1644   /* right diagonalscale the off-diagonal part */
1645   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1646   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1647   PetscFunctionReturn(0);
1648 }
1649 
1650 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1651 {
1652   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1653   PetscErrorCode ierr;
1654 
1655   PetscFunctionBegin;
1656   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1657   PetscFunctionReturn(0);
1658 }
1659 
1660 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1661 
1662 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1663 {
1664   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1665   Mat            a,b,c,d;
1666   PetscBool      flg;
1667   PetscErrorCode ierr;
1668 
1669   PetscFunctionBegin;
1670   a = matA->A; b = matA->B;
1671   c = matB->A; d = matB->B;
1672 
1673   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1674   if (flg) {
1675     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1676   }
1677   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1678   PetscFunctionReturn(0);
1679 }
1680 
1681 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1682 {
1683   PetscErrorCode ierr;
1684   PetscBool      isbaij;
1685 
1686   PetscFunctionBegin;
1687   ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr);
1688   if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name);
1689   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1690   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1691     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1692     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1693     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1694   } else {
1695     Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1696     Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
1697 
1698     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1699     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1700   }
1701   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1702   PetscFunctionReturn(0);
1703 }
1704 
1705 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1706 {
1707   PetscErrorCode ierr;
1708 
1709   PetscFunctionBegin;
1710   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1711   PetscFunctionReturn(0);
1712 }
1713 
1714 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1715 {
1716   PetscErrorCode ierr;
1717   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1718   PetscBLASInt   bnz,one=1;
1719   Mat_SeqSBAIJ   *xa,*ya;
1720   Mat_SeqBAIJ    *xb,*yb;
1721 
1722   PetscFunctionBegin;
1723   if (str == SAME_NONZERO_PATTERN) {
1724     PetscScalar alpha = a;
1725     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1726     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1727     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1728     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1729     xb   = (Mat_SeqBAIJ*)xx->B->data;
1730     yb   = (Mat_SeqBAIJ*)yy->B->data;
1731     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1732     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1733     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1734   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1735     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1736     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1737     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1738   } else {
1739     Mat      B;
1740     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1741     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1742     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1743     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1744     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1745     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1746     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1747     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1748     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1749     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1750     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1751     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1752     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1753     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1754     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1755     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1756     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1757     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1758     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1759     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1760   }
1761   PetscFunctionReturn(0);
1762 }
1763 
1764 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1765 {
1766   PetscErrorCode ierr;
1767   PetscInt       i;
1768   PetscBool      flg;
1769 
1770   PetscFunctionBegin;
1771   ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */
1772   for (i=0; i<n; i++) {
1773     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1774     if (!flg) {
1775       ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr);
1776     }
1777   }
1778   PetscFunctionReturn(0);
1779 }
1780 
1781 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1782 {
1783   PetscErrorCode ierr;
1784   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1785   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1786 
1787   PetscFunctionBegin;
1788   if (!Y->preallocated) {
1789     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1790   } else if (!aij->nz) {
1791     PetscInt nonew = aij->nonew;
1792     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1793     aij->nonew = nonew;
1794   }
1795   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1796   PetscFunctionReturn(0);
1797 }
1798 
1799 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1800 {
1801   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1802   PetscErrorCode ierr;
1803 
1804   PetscFunctionBegin;
1805   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1806   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
1807   if (d) {
1808     PetscInt rstart;
1809     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1810     *d += rstart/A->rmap->bs;
1811 
1812   }
1813   PetscFunctionReturn(0);
1814 }
1815 
1816 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1817 {
1818   PetscFunctionBegin;
1819   *a = ((Mat_MPISBAIJ*)A->data)->A;
1820   PetscFunctionReturn(0);
1821 }
1822 
1823 /* -------------------------------------------------------------------*/
1824 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1825                                        MatGetRow_MPISBAIJ,
1826                                        MatRestoreRow_MPISBAIJ,
1827                                        MatMult_MPISBAIJ,
1828                                /*  4*/ MatMultAdd_MPISBAIJ,
1829                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1830                                        MatMultAdd_MPISBAIJ,
1831                                        0,
1832                                        0,
1833                                        0,
1834                                /* 10*/ 0,
1835                                        0,
1836                                        0,
1837                                        MatSOR_MPISBAIJ,
1838                                        MatTranspose_MPISBAIJ,
1839                                /* 15*/ MatGetInfo_MPISBAIJ,
1840                                        MatEqual_MPISBAIJ,
1841                                        MatGetDiagonal_MPISBAIJ,
1842                                        MatDiagonalScale_MPISBAIJ,
1843                                        MatNorm_MPISBAIJ,
1844                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1845                                        MatAssemblyEnd_MPISBAIJ,
1846                                        MatSetOption_MPISBAIJ,
1847                                        MatZeroEntries_MPISBAIJ,
1848                                /* 24*/ 0,
1849                                        0,
1850                                        0,
1851                                        0,
1852                                        0,
1853                                /* 29*/ MatSetUp_MPISBAIJ,
1854                                        0,
1855                                        0,
1856                                        MatGetDiagonalBlock_MPISBAIJ,
1857                                        0,
1858                                /* 34*/ MatDuplicate_MPISBAIJ,
1859                                        0,
1860                                        0,
1861                                        0,
1862                                        0,
1863                                /* 39*/ MatAXPY_MPISBAIJ,
1864                                        MatCreateSubMatrices_MPISBAIJ,
1865                                        MatIncreaseOverlap_MPISBAIJ,
1866                                        MatGetValues_MPISBAIJ,
1867                                        MatCopy_MPISBAIJ,
1868                                /* 44*/ 0,
1869                                        MatScale_MPISBAIJ,
1870                                        MatShift_MPISBAIJ,
1871                                        0,
1872                                        0,
1873                                /* 49*/ 0,
1874                                        0,
1875                                        0,
1876                                        0,
1877                                        0,
1878                                /* 54*/ 0,
1879                                        0,
1880                                        MatSetUnfactored_MPISBAIJ,
1881                                        0,
1882                                        MatSetValuesBlocked_MPISBAIJ,
1883                                /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1884                                        0,
1885                                        0,
1886                                        0,
1887                                        0,
1888                                /* 64*/ 0,
1889                                        0,
1890                                        0,
1891                                        0,
1892                                        0,
1893                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1894                                        0,
1895                                        MatConvert_MPISBAIJ_Basic,
1896                                        0,
1897                                        0,
1898                                /* 74*/ 0,
1899                                        0,
1900                                        0,
1901                                        0,
1902                                        0,
1903                                /* 79*/ 0,
1904                                        0,
1905                                        0,
1906                                        0,
1907                                        MatLoad_MPISBAIJ,
1908                                /* 84*/ 0,
1909                                        0,
1910                                        0,
1911                                        0,
1912                                        0,
1913                                /* 89*/ 0,
1914                                        0,
1915                                        0,
1916                                        0,
1917                                        0,
1918                                /* 94*/ 0,
1919                                        0,
1920                                        0,
1921                                        0,
1922                                        0,
1923                                /* 99*/ 0,
1924                                        0,
1925                                        0,
1926                                        0,
1927                                        0,
1928                                /*104*/ 0,
1929                                        MatRealPart_MPISBAIJ,
1930                                        MatImaginaryPart_MPISBAIJ,
1931                                        MatGetRowUpperTriangular_MPISBAIJ,
1932                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1933                                /*109*/ 0,
1934                                        0,
1935                                        0,
1936                                        0,
1937                                        MatMissingDiagonal_MPISBAIJ,
1938                                /*114*/ 0,
1939                                        0,
1940                                        0,
1941                                        0,
1942                                        0,
1943                                /*119*/ 0,
1944                                        0,
1945                                        0,
1946                                        0,
1947                                        0,
1948                                /*124*/ 0,
1949                                        0,
1950                                        0,
1951                                        0,
1952                                        0,
1953                                /*129*/ 0,
1954                                        0,
1955                                        0,
1956                                        0,
1957                                        0,
1958                                /*134*/ 0,
1959                                        0,
1960                                        0,
1961                                        0,
1962                                        0,
1963                                /*139*/ MatSetBlockSizes_Default,
1964                                        0,
1965                                        0,
1966                                        0,
1967                                        0,
1968                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
1969 };
1970 
1971 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1972 {
1973   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1974   PetscErrorCode ierr;
1975   PetscInt       i,mbs,Mbs;
1976   PetscMPIInt    size;
1977 
1978   PetscFunctionBegin;
1979   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1980   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1981   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1982   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1983   if (B->rmap->N > B->cmap->N) SETERRQ2(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %D than columns %D",B->rmap->N,B->cmap->N);
1984   if (B->rmap->n > B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %D than columns %D",B->rmap->n,B->cmap->n);
1985 
1986   mbs = B->rmap->n/bs;
1987   Mbs = B->rmap->N/bs;
1988   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1989 
1990   B->rmap->bs = bs;
1991   b->bs2      = bs*bs;
1992   b->mbs      = mbs;
1993   b->Mbs      = Mbs;
1994   b->nbs      = B->cmap->n/bs;
1995   b->Nbs      = B->cmap->N/bs;
1996 
1997   for (i=0; i<=b->size; i++) {
1998     b->rangebs[i] = B->rmap->range[i]/bs;
1999   }
2000   b->rstartbs = B->rmap->rstart/bs;
2001   b->rendbs   = B->rmap->rend/bs;
2002 
2003   b->cstartbs = B->cmap->rstart/bs;
2004   b->cendbs   = B->cmap->rend/bs;
2005 
2006 #if defined(PETSC_USE_CTABLE)
2007   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2008 #else
2009   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2010 #endif
2011   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2012   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2013   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2014   ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr);
2015   ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr);
2016   ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr);
2017   ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr);
2018   ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr);
2019   ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr);
2020 
2021   /* Because the B will have been resized we simply destroy it and create a new one each time */
2022   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2023   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2024   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2025   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2026   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2027   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2028 
2029   if (!B->preallocated) {
2030     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2031     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2032     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
2033     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2034     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2035   }
2036 
2037   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2038   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2039 
2040   B->preallocated  = PETSC_TRUE;
2041   B->was_assembled = PETSC_FALSE;
2042   B->assembled     = PETSC_FALSE;
2043   PetscFunctionReturn(0);
2044 }
2045 
2046 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2047 {
2048   PetscInt       m,rstart,cend;
2049   PetscInt       i,j,d,nz,bd, nz_max=0,*d_nnz=0,*o_nnz=0;
2050   const PetscInt *JJ    =0;
2051   PetscScalar    *values=0;
2052   PetscBool      roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented;
2053   PetscErrorCode ierr;
2054   PetscBool      nooffprocentries;
2055 
2056   PetscFunctionBegin;
2057   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2058   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2059   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2060   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2061   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2062   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2063   m      = B->rmap->n/bs;
2064   rstart = B->rmap->rstart/bs;
2065   cend   = B->cmap->rend/bs;
2066 
2067   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2068   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2069   for (i=0; i<m; i++) {
2070     nz = ii[i+1] - ii[i];
2071     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2072     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2073     JJ     = jj + ii[i];
2074     bd     = 0;
2075     for (j=0; j<nz; j++) {
2076       if (*JJ >= i + rstart) break;
2077       JJ++;
2078       bd++;
2079     }
2080     d  = 0;
2081     for (; j<nz; j++) {
2082       if (*JJ++ >= cend) break;
2083       d++;
2084     }
2085     d_nnz[i] = d;
2086     o_nnz[i] = nz - d - bd;
2087     nz       = nz - bd;
2088     nz_max = PetscMax(nz_max,nz);
2089   }
2090   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2091   ierr = MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2092   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2093 
2094   values = (PetscScalar*)V;
2095   if (!values) {
2096     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2097   }
2098   for (i=0; i<m; i++) {
2099     PetscInt          row    = i + rstart;
2100     PetscInt          ncols  = ii[i+1] - ii[i];
2101     const PetscInt    *icols = jj + ii[i];
2102     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2103       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2104       ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2105     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2106       PetscInt j;
2107       for (j=0; j<ncols; j++) {
2108         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2109         ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2110       }
2111     }
2112   }
2113 
2114   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2115   nooffprocentries    = B->nooffprocentries;
2116   B->nooffprocentries = PETSC_TRUE;
2117   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2118   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2119   B->nooffprocentries = nooffprocentries;
2120 
2121   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2122   PetscFunctionReturn(0);
2123 }
2124 
2125 /*MC
2126    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2127    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2128    the matrix is stored.
2129 
2130    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2131    can call MatSetOption(Mat, MAT_HERMITIAN);
2132 
2133    Options Database Keys:
2134 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2135 
2136    Notes:
2137      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2138      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2139 
2140    Level: beginner
2141 
2142 .seealso: MatCreateBAIJ(), MATSEQSBAIJ, MatType
2143 M*/
2144 
2145 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2146 {
2147   Mat_MPISBAIJ   *b;
2148   PetscErrorCode ierr;
2149   PetscBool      flg = PETSC_FALSE;
2150 
2151   PetscFunctionBegin;
2152   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2153   B->data = (void*)b;
2154   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2155 
2156   B->ops->destroy = MatDestroy_MPISBAIJ;
2157   B->ops->view    = MatView_MPISBAIJ;
2158   B->assembled    = PETSC_FALSE;
2159   B->insertmode   = NOT_SET_VALUES;
2160 
2161   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2162   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2163 
2164   /* build local table of row and column ownerships */
2165   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2166 
2167   /* build cache for off array entries formed */
2168   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2169 
2170   b->donotstash  = PETSC_FALSE;
2171   b->colmap      = NULL;
2172   b->garray      = NULL;
2173   b->roworiented = PETSC_TRUE;
2174 
2175   /* stuff used in block assembly */
2176   b->barray = 0;
2177 
2178   /* stuff used for matrix vector multiply */
2179   b->lvec    = 0;
2180   b->Mvctx   = 0;
2181   b->slvec0  = 0;
2182   b->slvec0b = 0;
2183   b->slvec1  = 0;
2184   b->slvec1a = 0;
2185   b->slvec1b = 0;
2186   b->sMvctx  = 0;
2187 
2188   /* stuff for MatGetRow() */
2189   b->rowindices   = 0;
2190   b->rowvalues    = 0;
2191   b->getrowactive = PETSC_FALSE;
2192 
2193   /* hash table stuff */
2194   b->ht           = 0;
2195   b->hd           = 0;
2196   b->ht_size      = 0;
2197   b->ht_flag      = PETSC_FALSE;
2198   b->ht_fact      = 0;
2199   b->ht_total_ct  = 0;
2200   b->ht_insert_ct = 0;
2201 
2202   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2203   b->ijonly = PETSC_FALSE;
2204 
2205   b->in_loc = 0;
2206   b->v_loc  = 0;
2207   b->n_loc  = 0;
2208 
2209   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2210   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2211   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2212   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2213 #if defined(PETSC_HAVE_ELEMENTAL)
2214   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2215 #endif
2216   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr);
2217   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr);
2218 
2219   B->symmetric                  = PETSC_TRUE;
2220   B->structurally_symmetric     = PETSC_TRUE;
2221   B->symmetric_set              = PETSC_TRUE;
2222   B->structurally_symmetric_set = PETSC_TRUE;
2223   B->symmetric_eternal          = PETSC_TRUE;
2224 #if defined(PETSC_USE_COMPLEX)
2225   B->hermitian                  = PETSC_FALSE;
2226   B->hermitian_set              = PETSC_FALSE;
2227 #else
2228   B->hermitian                  = PETSC_TRUE;
2229   B->hermitian_set              = PETSC_TRUE;
2230 #endif
2231 
2232   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2233   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2234   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2235   if (flg) {
2236     PetscReal fact = 1.39;
2237     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2238     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2239     if (fact <= 1.0) fact = 1.39;
2240     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2241     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2242   }
2243   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2244   PetscFunctionReturn(0);
2245 }
2246 
2247 /*MC
2248    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2249 
2250    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2251    and MATMPISBAIJ otherwise.
2252 
2253    Options Database Keys:
2254 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2255 
2256   Level: beginner
2257 
2258 .seealso: MatCreateMPISBAIJ, MATSEQSBAIJ, MATMPISBAIJ
2259 M*/
2260 
2261 /*@C
2262    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2263    the user should preallocate the matrix storage by setting the parameters
2264    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2265    performance can be increased by more than a factor of 50.
2266 
2267    Collective on Mat
2268 
2269    Input Parameters:
2270 +  B - the matrix
2271 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2272           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2273 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2274            submatrix  (same for all local rows)
2275 .  d_nnz - array containing the number of block nonzeros in the various block rows
2276            in the upper triangular and diagonal part of the in diagonal portion of the local
2277            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2278            for the diagonal entry and set a value even if it is zero.
2279 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2280            submatrix (same for all local rows).
2281 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2282            off-diagonal portion of the local submatrix that is right of the diagonal
2283            (possibly different for each block row) or NULL.
2284 
2285 
2286    Options Database Keys:
2287 +   -mat_no_unroll - uses code that does not unroll the loops in the
2288                      block calculations (much slower)
2289 -   -mat_block_size - size of the blocks to use
2290 
2291    Notes:
2292 
2293    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2294    than it must be used on all processors that share the object for that argument.
2295 
2296    If the *_nnz parameter is given then the *_nz parameter is ignored
2297 
2298    Storage Information:
2299    For a square global matrix we define each processor's diagonal portion
2300    to be its local rows and the corresponding columns (a square submatrix);
2301    each processor's off-diagonal portion encompasses the remainder of the
2302    local matrix (a rectangular submatrix).
2303 
2304    The user can specify preallocated storage for the diagonal part of
2305    the local submatrix with either d_nz or d_nnz (not both).  Set
2306    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2307    memory allocation.  Likewise, specify preallocated storage for the
2308    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2309 
2310    You can call MatGetInfo() to get information on how effective the preallocation was;
2311    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2312    You can also run with the option -info and look for messages with the string
2313    malloc in them to see if additional memory allocation was needed.
2314 
2315    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2316    the figure below we depict these three local rows and all columns (0-11).
2317 
2318 .vb
2319            0 1 2 3 4 5 6 7 8 9 10 11
2320           --------------------------
2321    row 3  |. . . d d d o o o o  o  o
2322    row 4  |. . . d d d o o o o  o  o
2323    row 5  |. . . d d d o o o o  o  o
2324           --------------------------
2325 .ve
2326 
2327    Thus, any entries in the d locations are stored in the d (diagonal)
2328    submatrix, and any entries in the o locations are stored in the
2329    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2330    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2331 
2332    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2333    plus the diagonal part of the d matrix,
2334    and o_nz should indicate the number of block nonzeros per row in the o matrix
2335 
2336    In general, for PDE problems in which most nonzeros are near the diagonal,
2337    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2338    or you will get TERRIBLE performance; see the users' manual chapter on
2339    matrices.
2340 
2341    Level: intermediate
2342 
2343 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2344 @*/
2345 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2346 {
2347   PetscErrorCode ierr;
2348 
2349   PetscFunctionBegin;
2350   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2351   PetscValidType(B,1);
2352   PetscValidLogicalCollectiveInt(B,bs,2);
2353   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2354   PetscFunctionReturn(0);
2355 }
2356 
2357 /*@C
2358    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2359    (block compressed row).  For good matrix assembly performance
2360    the user should preallocate the matrix storage by setting the parameters
2361    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2362    performance can be increased by more than a factor of 50.
2363 
2364    Collective
2365 
2366    Input Parameters:
2367 +  comm - MPI communicator
2368 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2369           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2370 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2371            This value should be the same as the local size used in creating the
2372            y vector for the matrix-vector product y = Ax.
2373 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2374            This value should be the same as the local size used in creating the
2375            x vector for the matrix-vector product y = Ax.
2376 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2377 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2378 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2379            submatrix  (same for all local rows)
2380 .  d_nnz - array containing the number of block nonzeros in the various block rows
2381            in the upper triangular portion of the in diagonal portion of the local
2382            (possibly different for each block block row) or NULL.
2383            If you plan to factor the matrix you must leave room for the diagonal entry and
2384            set its value even if it is zero.
2385 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2386            submatrix (same for all local rows).
2387 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2388            off-diagonal portion of the local submatrix (possibly different for
2389            each block row) or NULL.
2390 
2391    Output Parameter:
2392 .  A - the matrix
2393 
2394    Options Database Keys:
2395 +   -mat_no_unroll - uses code that does not unroll the loops in the
2396                      block calculations (much slower)
2397 .   -mat_block_size - size of the blocks to use
2398 -   -mat_mpi - use the parallel matrix data structures even on one processor
2399                (defaults to using SeqBAIJ format on one processor)
2400 
2401    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2402    MatXXXXSetPreallocation() paradigm instead of this routine directly.
2403    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2404 
2405    Notes:
2406    The number of rows and columns must be divisible by blocksize.
2407    This matrix type does not support complex Hermitian operation.
2408 
2409    The user MUST specify either the local or global matrix dimensions
2410    (possibly both).
2411 
2412    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2413    than it must be used on all processors that share the object for that argument.
2414 
2415    If the *_nnz parameter is given then the *_nz parameter is ignored
2416 
2417    Storage Information:
2418    For a square global matrix we define each processor's diagonal portion
2419    to be its local rows and the corresponding columns (a square submatrix);
2420    each processor's off-diagonal portion encompasses the remainder of the
2421    local matrix (a rectangular submatrix).
2422 
2423    The user can specify preallocated storage for the diagonal part of
2424    the local submatrix with either d_nz or d_nnz (not both).  Set
2425    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2426    memory allocation.  Likewise, specify preallocated storage for the
2427    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2428 
2429    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2430    the figure below we depict these three local rows and all columns (0-11).
2431 
2432 .vb
2433            0 1 2 3 4 5 6 7 8 9 10 11
2434           --------------------------
2435    row 3  |. . . d d d o o o o  o  o
2436    row 4  |. . . d d d o o o o  o  o
2437    row 5  |. . . d d d o o o o  o  o
2438           --------------------------
2439 .ve
2440 
2441    Thus, any entries in the d locations are stored in the d (diagonal)
2442    submatrix, and any entries in the o locations are stored in the
2443    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2444    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2445 
2446    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2447    plus the diagonal part of the d matrix,
2448    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2449    In general, for PDE problems in which most nonzeros are near the diagonal,
2450    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2451    or you will get TERRIBLE performance; see the users' manual chapter on
2452    matrices.
2453 
2454    Level: intermediate
2455 
2456 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2457 @*/
2458 
2459 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2460 {
2461   PetscErrorCode ierr;
2462   PetscMPIInt    size;
2463 
2464   PetscFunctionBegin;
2465   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2466   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2467   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2468   if (size > 1) {
2469     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2470     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2471   } else {
2472     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2473     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2474   }
2475   PetscFunctionReturn(0);
2476 }
2477 
2478 
2479 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2480 {
2481   Mat            mat;
2482   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2483   PetscErrorCode ierr;
2484   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2485   PetscScalar    *array;
2486 
2487   PetscFunctionBegin;
2488   *newmat = 0;
2489 
2490   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2491   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2492   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2493   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2494   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2495 
2496   mat->factortype   = matin->factortype;
2497   mat->preallocated = PETSC_TRUE;
2498   mat->assembled    = PETSC_TRUE;
2499   mat->insertmode   = NOT_SET_VALUES;
2500 
2501   a      = (Mat_MPISBAIJ*)mat->data;
2502   a->bs2 = oldmat->bs2;
2503   a->mbs = oldmat->mbs;
2504   a->nbs = oldmat->nbs;
2505   a->Mbs = oldmat->Mbs;
2506   a->Nbs = oldmat->Nbs;
2507 
2508   a->size         = oldmat->size;
2509   a->rank         = oldmat->rank;
2510   a->donotstash   = oldmat->donotstash;
2511   a->roworiented  = oldmat->roworiented;
2512   a->rowindices   = 0;
2513   a->rowvalues    = 0;
2514   a->getrowactive = PETSC_FALSE;
2515   a->barray       = 0;
2516   a->rstartbs     = oldmat->rstartbs;
2517   a->rendbs       = oldmat->rendbs;
2518   a->cstartbs     = oldmat->cstartbs;
2519   a->cendbs       = oldmat->cendbs;
2520 
2521   /* hash table stuff */
2522   a->ht           = 0;
2523   a->hd           = 0;
2524   a->ht_size      = 0;
2525   a->ht_flag      = oldmat->ht_flag;
2526   a->ht_fact      = oldmat->ht_fact;
2527   a->ht_total_ct  = 0;
2528   a->ht_insert_ct = 0;
2529 
2530   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2);CHKERRQ(ierr);
2531   if (oldmat->colmap) {
2532 #if defined(PETSC_USE_CTABLE)
2533     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2534 #else
2535     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2536     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2537     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
2538 #endif
2539   } else a->colmap = 0;
2540 
2541   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2542     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2543     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2544     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
2545   } else a->garray = 0;
2546 
2547   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2548   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2549   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2550   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2551   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2552 
2553   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2554   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2555   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2556   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2557 
2558   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2559   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2560   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2561   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2562   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2563   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2564   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2565   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2566   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2567   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2568   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2569   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2570   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2571 
2572   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2573   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2574   a->sMvctx = oldmat->sMvctx;
2575   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2576 
2577   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2578   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2579   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2580   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2581   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2582   *newmat = mat;
2583   PetscFunctionReturn(0);
2584 }
2585 
2586 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2587 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2588 
2589 PetscErrorCode MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer)
2590 {
2591   PetscErrorCode ierr;
2592   PetscBool      isbinary;
2593 
2594   PetscFunctionBegin;
2595   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2596   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
2597   ierr = MatLoad_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
2598   PetscFunctionReturn(0);
2599 }
2600 
2601 /*XXXXX@
2602    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2603 
2604    Input Parameters:
2605 .  mat  - the matrix
2606 .  fact - factor
2607 
2608    Not Collective on Mat, each process can have a different hash factor
2609 
2610    Level: advanced
2611 
2612   Notes:
2613    This can also be set by the command line option: -mat_use_hash_table fact
2614 
2615 .seealso: MatSetOption()
2616 @XXXXX*/
2617 
2618 
2619 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2620 {
2621   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2622   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2623   PetscReal      atmp;
2624   PetscReal      *work,*svalues,*rvalues;
2625   PetscErrorCode ierr;
2626   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2627   PetscMPIInt    rank,size;
2628   PetscInt       *rowners_bs,dest,count,source;
2629   PetscScalar    *va;
2630   MatScalar      *ba;
2631   MPI_Status     stat;
2632 
2633   PetscFunctionBegin;
2634   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2635   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2636   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2637 
2638   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2639   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2640 
2641   bs  = A->rmap->bs;
2642   mbs = a->mbs;
2643   Mbs = a->Mbs;
2644   ba  = b->a;
2645   bi  = b->i;
2646   bj  = b->j;
2647 
2648   /* find ownerships */
2649   rowners_bs = A->rmap->range;
2650 
2651   /* each proc creates an array to be distributed */
2652   ierr = PetscCalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2653 
2654   /* row_max for B */
2655   if (rank != size-1) {
2656     for (i=0; i<mbs; i++) {
2657       ncols = bi[1] - bi[0]; bi++;
2658       brow  = bs*i;
2659       for (j=0; j<ncols; j++) {
2660         bcol = bs*(*bj);
2661         for (kcol=0; kcol<bs; kcol++) {
2662           col  = bcol + kcol;                /* local col index */
2663           col += rowners_bs[rank+1];      /* global col index */
2664           for (krow=0; krow<bs; krow++) {
2665             atmp = PetscAbsScalar(*ba); ba++;
2666             row  = brow + krow;   /* local row index */
2667             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2668             if (work[col] < atmp) work[col] = atmp;
2669           }
2670         }
2671         bj++;
2672       }
2673     }
2674 
2675     /* send values to its owners */
2676     for (dest=rank+1; dest<size; dest++) {
2677       svalues = work + rowners_bs[dest];
2678       count   = rowners_bs[dest+1]-rowners_bs[dest];
2679       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2680     }
2681   }
2682 
2683   /* receive values */
2684   if (rank) {
2685     rvalues = work;
2686     count   = rowners_bs[rank+1]-rowners_bs[rank];
2687     for (source=0; source<rank; source++) {
2688       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2689       /* process values */
2690       for (i=0; i<count; i++) {
2691         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2692       }
2693     }
2694   }
2695 
2696   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2697   ierr = PetscFree(work);CHKERRQ(ierr);
2698   PetscFunctionReturn(0);
2699 }
2700 
2701 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2702 {
2703   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2704   PetscErrorCode    ierr;
2705   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2706   PetscScalar       *x,*ptr,*from;
2707   Vec               bb1;
2708   const PetscScalar *b;
2709 
2710   PetscFunctionBegin;
2711   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2712   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2713 
2714   if (flag == SOR_APPLY_UPPER) {
2715     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2716     PetscFunctionReturn(0);
2717   }
2718 
2719   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2720     if (flag & SOR_ZERO_INITIAL_GUESS) {
2721       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2722       its--;
2723     }
2724 
2725     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2726     while (its--) {
2727 
2728       /* lower triangular part: slvec0b = - B^T*xx */
2729       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2730 
2731       /* copy xx into slvec0a */
2732       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2733       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2734       ierr = PetscArraycpy(ptr,x,bs*mbs);CHKERRQ(ierr);
2735       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2736 
2737       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2738 
2739       /* copy bb into slvec1a */
2740       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2741       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2742       ierr = PetscArraycpy(ptr,b,bs*mbs);CHKERRQ(ierr);
2743       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2744 
2745       /* set slvec1b = 0 */
2746       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2747 
2748       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2749       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2750       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2751       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2752 
2753       /* upper triangular part: bb1 = bb1 - B*x */
2754       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2755 
2756       /* local diagonal sweep */
2757       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2758     }
2759     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2760   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2761     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2762   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2763     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2764   } else if (flag & SOR_EISENSTAT) {
2765     Vec               xx1;
2766     PetscBool         hasop;
2767     const PetscScalar *diag;
2768     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2769     PetscInt          i,n;
2770 
2771     if (!mat->xx1) {
2772       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2773       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2774     }
2775     xx1 = mat->xx1;
2776     bb1 = mat->bb1;
2777 
2778     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2779 
2780     if (!mat->diag) {
2781       /* this is wrong for same matrix with new nonzero values */
2782       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2783       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2784     }
2785     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2786 
2787     if (hasop) {
2788       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2789       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2790     } else {
2791       /*
2792           These two lines are replaced by code that may be a bit faster for a good compiler
2793       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2794       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2795       */
2796       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2797       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2798       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2799       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2800       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2801       if (omega == 1.0) {
2802         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2803         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2804       } else {
2805         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2806         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2807       }
2808       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2809       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2810       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2811       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2812     }
2813 
2814     /* multiply off-diagonal portion of matrix */
2815     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2816     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2817     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2818     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2819     ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
2820     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2821     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2822     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2823     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2824     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2825 
2826     /* local sweep */
2827     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2828     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2829   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2830   PetscFunctionReturn(0);
2831 }
2832 
2833 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2834 {
2835   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2836   PetscErrorCode ierr;
2837   Vec            lvec1,bb1;
2838 
2839   PetscFunctionBegin;
2840   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2841   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2842 
2843   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2844     if (flag & SOR_ZERO_INITIAL_GUESS) {
2845       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2846       its--;
2847     }
2848 
2849     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2850     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2851     while (its--) {
2852       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2853 
2854       /* lower diagonal part: bb1 = bb - B^T*xx */
2855       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2856       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2857 
2858       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2859       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2860       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2861 
2862       /* upper diagonal part: bb1 = bb1 - B*x */
2863       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2864       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2865 
2866       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2867 
2868       /* diagonal sweep */
2869       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2870     }
2871     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2872     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2873   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2874   PetscFunctionReturn(0);
2875 }
2876 
2877 /*@
2878      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2879          CSR format the local rows.
2880 
2881    Collective
2882 
2883    Input Parameters:
2884 +  comm - MPI communicator
2885 .  bs - the block size, only a block size of 1 is supported
2886 .  m - number of local rows (Cannot be PETSC_DECIDE)
2887 .  n - This value should be the same as the local size used in creating the
2888        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2889        calculated if N is given) For square matrices n is almost always m.
2890 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2891 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2892 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2893 .   j - column indices
2894 -   a - matrix values
2895 
2896    Output Parameter:
2897 .   mat - the matrix
2898 
2899    Level: intermediate
2900 
2901    Notes:
2902        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2903      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2904      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2905 
2906        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2907 
2908 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2909           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2910 @*/
2911 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2912 {
2913   PetscErrorCode ierr;
2914 
2915 
2916   PetscFunctionBegin;
2917   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2918   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2919   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2920   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2921   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2922   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2923   PetscFunctionReturn(0);
2924 }
2925 
2926 
2927 /*@C
2928    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values
2929 
2930    Collective
2931 
2932    Input Parameters:
2933 +  B - the matrix
2934 .  bs - the block size
2935 .  i - the indices into j for the start of each local row (starts with zero)
2936 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2937 -  v - optional values in the matrix
2938 
2939    Level: advanced
2940 
2941    Notes:
2942    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2943    and usually the numerical values as well
2944 
2945    Any entries below the diagonal are ignored
2946 
2947 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2948 @*/
2949 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2950 {
2951   PetscErrorCode ierr;
2952 
2953   PetscFunctionBegin;
2954   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2955   PetscFunctionReturn(0);
2956 }
2957 
2958 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2959 {
2960   PetscErrorCode ierr;
2961   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
2962   PetscInt       *indx;
2963   PetscScalar    *values;
2964 
2965   PetscFunctionBegin;
2966   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2967   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2968     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
2969     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
2970     PetscInt       *bindx,rmax=a->rmax,j;
2971     PetscMPIInt    rank,size;
2972 
2973     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2974     mbs = m/bs; Nbs = N/cbs;
2975     if (n == PETSC_DECIDE) {
2976       ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);
2977     }
2978     nbs = n/cbs;
2979 
2980     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
2981     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
2982 
2983     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2984     ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr);
2985     if (rank == size-1) {
2986       /* Check sum(nbs) = Nbs */
2987       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
2988     }
2989 
2990     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
2991     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2992     for (i=0; i<mbs; i++) {
2993       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
2994       nnz  = nnz/bs;
2995       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
2996       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
2997       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
2998     }
2999     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3000     ierr = PetscFree(bindx);CHKERRQ(ierr);
3001 
3002     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3003     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3004     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3005     ierr = MatSetType(*outmat,MATSBAIJ);CHKERRQ(ierr);
3006     ierr = MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3007     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3008     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3009   }
3010 
3011   /* numeric phase */
3012   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3013   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3014 
3015   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3016   for (i=0; i<m; i++) {
3017     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3018     Ii   = i + rstart;
3019     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3020     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3021   }
3022   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3023   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3024   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3025   PetscFunctionReturn(0);
3026 }
3027