xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision f5ff9c666c0d37e8a7ec3aa2f8e2aa9e44449bdb)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 #if defined(PETSC_HAVE_SCALAPACK)
11 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
12 #endif
13 
14 /* This could be moved to matimpl.h */
15 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
16 {
17   Mat            preallocator;
18   PetscInt       r,rstart,rend;
19   PetscInt       bs,i,m,n,M,N;
20   PetscBool      cong = PETSC_TRUE;
21   PetscErrorCode ierr;
22 
23   PetscFunctionBegin;
24   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
25   PetscValidLogicalCollectiveInt(B,nm,2);
26   for (i = 0; i < nm; i++) {
27     PetscValidHeaderSpecific(X[i],MAT_CLASSID,3);
28     ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr);
29     if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts");
30   }
31   PetscValidLogicalCollectiveBool(B,fill,5);
32   ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr);
33   ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr);
34   ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr);
35   ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr);
36   ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr);
37   ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr);
38   ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr);
39   ierr = MatSetUp(preallocator);CHKERRQ(ierr);
40   ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr);
41   for (r = rstart; r < rend; ++r) {
42     PetscInt          ncols;
43     const PetscInt    *row;
44     const PetscScalar *vals;
45 
46     for (i = 0; i < nm; i++) {
47       ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
48       ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
49       if (symm && symm[i]) {
50         ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
51       }
52       ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
53     }
54   }
55   ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
56   ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
57   ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr);
58   ierr = MatDestroy(&preallocator);CHKERRQ(ierr);
59   PetscFunctionReturn(0);
60 }
61 
62 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
63 {
64   Mat            B;
65   PetscErrorCode ierr;
66   PetscInt       r;
67 
68   PetscFunctionBegin;
69   if (reuse != MAT_REUSE_MATRIX) {
70     PetscBool symm = PETSC_TRUE,isdense;
71     PetscInt  bs;
72 
73     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
74     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
75     ierr = MatSetType(B,newtype);CHKERRQ(ierr);
76     ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
77     ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr);
78     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
79     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
80     ierr = PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,"");CHKERRQ(ierr);
81     if (!isdense) {
82       ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
83       ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr);
84       ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
85     } else {
86       ierr = MatSetUp(B);CHKERRQ(ierr);
87     }
88   } else {
89     B    = *newmat;
90     ierr = MatZeroEntries(B);CHKERRQ(ierr);
91   }
92 
93   ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
94   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
95     PetscInt          ncols;
96     const PetscInt    *row;
97     const PetscScalar *vals;
98 
99     ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
100     ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
101 #if defined(PETSC_USE_COMPLEX)
102     if (A->hermitian) {
103       PetscInt i;
104       for (i = 0; i < ncols; i++) {
105         ierr = MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES);CHKERRQ(ierr);
106       }
107     } else {
108       ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
109     }
110 #else
111     ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
112 #endif
113     ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
114   }
115   ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
116   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
117   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
118 
119   if (reuse == MAT_INPLACE_MATRIX) {
120     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
121   } else {
122     *newmat = B;
123   }
124   PetscFunctionReturn(0);
125 }
126 
127 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
128 {
129   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
130   PetscErrorCode ierr;
131 
132   PetscFunctionBegin;
133   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
134   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
139 {
140   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
141   PetscErrorCode ierr;
142 
143   PetscFunctionBegin;
144   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
145   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
150   { \
151     brow = row/bs;  \
152     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
153     rmax = aimax[brow]; nrow = ailen[brow]; \
154     bcol = col/bs; \
155     ridx = row % bs; cidx = col % bs; \
156     low  = 0; high = nrow; \
157     while (high-low > 3) { \
158       t = (low+high)/2; \
159       if (rp[t] > bcol) high = t; \
160       else              low  = t; \
161     } \
162     for (_i=low; _i<high; _i++) { \
163       if (rp[_i] > bcol) break; \
164       if (rp[_i] == bcol) { \
165         bap = ap + bs2*_i + bs*cidx + ridx; \
166         if (addv == ADD_VALUES) *bap += value;  \
167         else                    *bap  = value;  \
168         goto a_noinsert; \
169       } \
170     } \
171     if (a->nonew == 1) goto a_noinsert; \
172     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
173     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
174     N = nrow++ - 1;  \
175     /* shift up all the later entries in this row */ \
176     ierr  = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \
177     ierr  = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
178     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
179     rp[_i]                      = bcol;  \
180     ap[bs2*_i + bs*cidx + ridx] = value;  \
181     A->nonzerostate++;\
182 a_noinsert:; \
183     ailen[brow] = nrow; \
184   }
185 
186 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
187   { \
188     brow = row/bs;  \
189     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
190     rmax = bimax[brow]; nrow = bilen[brow]; \
191     bcol = col/bs; \
192     ridx = row % bs; cidx = col % bs; \
193     low  = 0; high = nrow; \
194     while (high-low > 3) { \
195       t = (low+high)/2; \
196       if (rp[t] > bcol) high = t; \
197       else              low  = t; \
198     } \
199     for (_i=low; _i<high; _i++) { \
200       if (rp[_i] > bcol) break; \
201       if (rp[_i] == bcol) { \
202         bap = ap + bs2*_i + bs*cidx + ridx; \
203         if (addv == ADD_VALUES) *bap += value;  \
204         else                    *bap  = value;  \
205         goto b_noinsert; \
206       } \
207     } \
208     if (b->nonew == 1) goto b_noinsert; \
209     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
210     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
211     N = nrow++ - 1;  \
212     /* shift up all the later entries in this row */ \
213     ierr  = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \
214     ierr  = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
215     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \
216     rp[_i]                      = bcol;  \
217     ap[bs2*_i + bs*cidx + ridx] = value;  \
218     B->nonzerostate++;\
219 b_noinsert:; \
220     bilen[brow] = nrow; \
221   }
222 
223 /* Only add/insert a(i,j) with i<=j (blocks).
224    Any a(i,j) with i>j input by user is ingored or generates an error
225 */
226 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
227 {
228   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
229   MatScalar      value;
230   PetscBool      roworiented = baij->roworiented;
231   PetscErrorCode ierr;
232   PetscInt       i,j,row,col;
233   PetscInt       rstart_orig=mat->rmap->rstart;
234   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
235   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
236 
237   /* Some Variables required in the macro */
238   Mat          A     = baij->A;
239   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
240   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
241   MatScalar    *aa   =a->a;
242 
243   Mat         B     = baij->B;
244   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
245   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
246   MatScalar   *ba   =b->a;
247 
248   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
249   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
250   MatScalar *ap,*bap;
251 
252   /* for stash */
253   PetscInt  n_loc, *in_loc = NULL;
254   MatScalar *v_loc = NULL;
255 
256   PetscFunctionBegin;
257   if (!baij->donotstash) {
258     if (n > baij->n_loc) {
259       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
260       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
261       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
262       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
263 
264       baij->n_loc = n;
265     }
266     in_loc = baij->in_loc;
267     v_loc  = baij->v_loc;
268   }
269 
270   for (i=0; i<m; i++) {
271     if (im[i] < 0) continue;
272     if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
273     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
274       row = im[i] - rstart_orig;              /* local row index */
275       for (j=0; j<n; j++) {
276         if (im[i]/bs > in[j]/bs) {
277           if (a->ignore_ltriangular) {
278             continue;    /* ignore lower triangular blocks */
279           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
280         }
281         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
282           col  = in[j] - cstart_orig;         /* local col index */
283           brow = row/bs; bcol = col/bs;
284           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
285           if (roworiented) value = v[i*n+j];
286           else             value = v[i+j*m];
287           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
288           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
289         } else if (in[j] < 0) continue;
290         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
291         else {  /* off-diag entry (B) */
292           if (mat->was_assembled) {
293             if (!baij->colmap) {
294               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
295             }
296 #if defined(PETSC_USE_CTABLE)
297             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
298             col  = col - 1;
299 #else
300             col = baij->colmap[in[j]/bs] - 1;
301 #endif
302             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
303               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
304               col  =  in[j];
305               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
306               B    = baij->B;
307               b    = (Mat_SeqBAIJ*)(B)->data;
308               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
309               ba   = b->a;
310             } else col += in[j]%bs;
311           } else col = in[j];
312           if (roworiented) value = v[i*n+j];
313           else             value = v[i+j*m];
314           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
315           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
316         }
317       }
318     } else {  /* off processor entry */
319       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
320       if (!baij->donotstash) {
321         mat->assembled = PETSC_FALSE;
322         n_loc          = 0;
323         for (j=0; j<n; j++) {
324           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
325           in_loc[n_loc] = in[j];
326           if (roworiented) {
327             v_loc[n_loc] = v[i*n+j];
328           } else {
329             v_loc[n_loc] = v[j*m+i];
330           }
331           n_loc++;
332         }
333         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
334       }
335     }
336   }
337   PetscFunctionReturn(0);
338 }
339 
340 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
341 {
342   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
343   PetscErrorCode    ierr;
344   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
345   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
346   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
347   PetscBool         roworiented=a->roworiented;
348   const PetscScalar *value     = v;
349   MatScalar         *ap,*aa = a->a,*bap;
350 
351   PetscFunctionBegin;
352   if (col < row) {
353     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
354     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
355   }
356   rp   = aj + ai[row];
357   ap   = aa + bs2*ai[row];
358   rmax = imax[row];
359   nrow = ailen[row];
360   value = v;
361   low   = 0;
362   high  = nrow;
363 
364   while (high-low > 7) {
365     t = (low+high)/2;
366     if (rp[t] > col) high = t;
367     else             low  = t;
368   }
369   for (i=low; i<high; i++) {
370     if (rp[i] > col) break;
371     if (rp[i] == col) {
372       bap = ap +  bs2*i;
373       if (roworiented) {
374         if (is == ADD_VALUES) {
375           for (ii=0; ii<bs; ii++) {
376             for (jj=ii; jj<bs2; jj+=bs) {
377               bap[jj] += *value++;
378             }
379           }
380         } else {
381           for (ii=0; ii<bs; ii++) {
382             for (jj=ii; jj<bs2; jj+=bs) {
383               bap[jj] = *value++;
384             }
385           }
386         }
387       } else {
388         if (is == ADD_VALUES) {
389           for (ii=0; ii<bs; ii++) {
390             for (jj=0; jj<bs; jj++) {
391               *bap++ += *value++;
392             }
393           }
394         } else {
395           for (ii=0; ii<bs; ii++) {
396             for (jj=0; jj<bs; jj++) {
397               *bap++  = *value++;
398             }
399           }
400         }
401       }
402       goto noinsert2;
403     }
404   }
405   if (nonew == 1) goto noinsert2;
406   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
407   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
408   N = nrow++ - 1; high++;
409   /* shift up all the later entries in this row */
410   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
411   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
412   rp[i] = col;
413   bap   = ap +  bs2*i;
414   if (roworiented) {
415     for (ii=0; ii<bs; ii++) {
416       for (jj=ii; jj<bs2; jj+=bs) {
417         bap[jj] = *value++;
418       }
419     }
420   } else {
421     for (ii=0; ii<bs; ii++) {
422       for (jj=0; jj<bs; jj++) {
423         *bap++ = *value++;
424       }
425     }
426   }
427   noinsert2:;
428   ailen[row] = nrow;
429   PetscFunctionReturn(0);
430 }
431 
432 /*
433    This routine is exactly duplicated in mpibaij.c
434 */
435 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
436 {
437   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
438   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
439   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
440   PetscErrorCode    ierr;
441   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
442   PetscBool         roworiented=a->roworiented;
443   const PetscScalar *value     = v;
444   MatScalar         *ap,*aa = a->a,*bap;
445 
446   PetscFunctionBegin;
447   rp   = aj + ai[row];
448   ap   = aa + bs2*ai[row];
449   rmax = imax[row];
450   nrow = ailen[row];
451   low  = 0;
452   high = nrow;
453   value = v;
454   while (high-low > 7) {
455     t = (low+high)/2;
456     if (rp[t] > col) high = t;
457     else             low  = t;
458   }
459   for (i=low; i<high; i++) {
460     if (rp[i] > col) break;
461     if (rp[i] == col) {
462       bap = ap +  bs2*i;
463       if (roworiented) {
464         if (is == ADD_VALUES) {
465           for (ii=0; ii<bs; ii++) {
466             for (jj=ii; jj<bs2; jj+=bs) {
467               bap[jj] += *value++;
468             }
469           }
470         } else {
471           for (ii=0; ii<bs; ii++) {
472             for (jj=ii; jj<bs2; jj+=bs) {
473               bap[jj] = *value++;
474             }
475           }
476         }
477       } else {
478         if (is == ADD_VALUES) {
479           for (ii=0; ii<bs; ii++,value+=bs) {
480             for (jj=0; jj<bs; jj++) {
481               bap[jj] += value[jj];
482             }
483             bap += bs;
484           }
485         } else {
486           for (ii=0; ii<bs; ii++,value+=bs) {
487             for (jj=0; jj<bs; jj++) {
488               bap[jj]  = value[jj];
489             }
490             bap += bs;
491           }
492         }
493       }
494       goto noinsert2;
495     }
496   }
497   if (nonew == 1) goto noinsert2;
498   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
499   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
500   N = nrow++ - 1; high++;
501   /* shift up all the later entries in this row */
502   ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
503   ierr  = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
504   rp[i] = col;
505   bap   = ap +  bs2*i;
506   if (roworiented) {
507     for (ii=0; ii<bs; ii++) {
508       for (jj=ii; jj<bs2; jj+=bs) {
509         bap[jj] = *value++;
510       }
511     }
512   } else {
513     for (ii=0; ii<bs; ii++) {
514       for (jj=0; jj<bs; jj++) {
515         *bap++ = *value++;
516       }
517     }
518   }
519   noinsert2:;
520   ailen[row] = nrow;
521   PetscFunctionReturn(0);
522 }
523 
524 /*
525     This routine could be optimized by removing the need for the block copy below and passing stride information
526   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
527 */
528 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
529 {
530   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
531   const MatScalar *value;
532   MatScalar       *barray     =baij->barray;
533   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
534   PetscErrorCode  ierr;
535   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
536   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
537   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
538 
539   PetscFunctionBegin;
540   if (!barray) {
541     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
542     baij->barray = barray;
543   }
544 
545   if (roworiented) {
546     stepval = (n-1)*bs;
547   } else {
548     stepval = (m-1)*bs;
549   }
550   for (i=0; i<m; i++) {
551     if (im[i] < 0) continue;
552     if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
553     if (im[i] >= rstart && im[i] < rend) {
554       row = im[i] - rstart;
555       for (j=0; j<n; j++) {
556         if (im[i] > in[j]) {
557           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
558           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
559         }
560         /* If NumCol = 1 then a copy is not required */
561         if ((roworiented) && (n == 1)) {
562           barray = (MatScalar*) v + i*bs2;
563         } else if ((!roworiented) && (m == 1)) {
564           barray = (MatScalar*) v + j*bs2;
565         } else { /* Here a copy is required */
566           if (roworiented) {
567             value = v + i*(stepval+bs)*bs + j*bs;
568           } else {
569             value = v + j*(stepval+bs)*bs + i*bs;
570           }
571           for (ii=0; ii<bs; ii++,value+=stepval) {
572             for (jj=0; jj<bs; jj++) {
573               *barray++ = *value++;
574             }
575           }
576           barray -=bs2;
577         }
578 
579         if (in[j] >= cstart && in[j] < cend) {
580           col  = in[j] - cstart;
581           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
582         } else if (in[j] < 0) continue;
583         else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
584         else {
585           if (mat->was_assembled) {
586             if (!baij->colmap) {
587               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
588             }
589 
590 #if defined(PETSC_USE_DEBUG)
591 #if defined(PETSC_USE_CTABLE)
592             { PetscInt data;
593               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
594               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
595             }
596 #else
597             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
598 #endif
599 #endif
600 #if defined(PETSC_USE_CTABLE)
601             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
602             col  = (col - 1)/bs;
603 #else
604             col = (baij->colmap[in[j]] - 1)/bs;
605 #endif
606             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
607               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
608               col  = in[j];
609             }
610           } else col = in[j];
611           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
612         }
613       }
614     } else {
615       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
616       if (!baij->donotstash) {
617         if (roworiented) {
618           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
619         } else {
620           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
621         }
622       }
623     }
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
633   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
638     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
640       row = idxm[i] - bsrstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
643         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= bscstart && idxn[j] < bscend) {
645           col  = idxn[j] - bscstart;
646           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647         } else {
648           if (!baij->colmap) {
649             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
650           }
651 #if defined(PETSC_USE_CTABLE)
652           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
653           data--;
654 #else
655           data = baij->colmap[idxn[j]/bs]-1;
656 #endif
657           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
658           else {
659             col  = data + idxn[j]%bs;
660             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
661           }
662         }
663       }
664     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
665   }
666   PetscFunctionReturn(0);
667 }
668 
669 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
670 {
671   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
672   PetscErrorCode ierr;
673   PetscReal      sum[2],*lnorm2;
674 
675   PetscFunctionBegin;
676   if (baij->size == 1) {
677     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
678   } else {
679     if (type == NORM_FROBENIUS) {
680       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
681       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
682       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
683       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
684       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
685       ierr    = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
686       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
687       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
688     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
689       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
690       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
691       PetscReal    *rsum,*rsum2,vabs;
692       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
693       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
694       MatScalar    *v;
695 
696       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
697       ierr = PetscArrayzero(rsum,mat->cmap->N);CHKERRQ(ierr);
698       /* Amat */
699       v = amat->a; jj = amat->j;
700       for (brow=0; brow<mbs; brow++) {
701         grow = bs*(rstart + brow);
702         nz   = amat->i[brow+1] - amat->i[brow];
703         for (bcol=0; bcol<nz; bcol++) {
704           gcol = bs*(rstart + *jj); jj++;
705           for (col=0; col<bs; col++) {
706             for (row=0; row<bs; row++) {
707               vabs            = PetscAbsScalar(*v); v++;
708               rsum[gcol+col] += vabs;
709               /* non-diagonal block */
710               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
711             }
712           }
713         }
714         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
715       }
716       /* Bmat */
717       v = bmat->a; jj = bmat->j;
718       for (brow=0; brow<mbs; brow++) {
719         grow = bs*(rstart + brow);
720         nz = bmat->i[brow+1] - bmat->i[brow];
721         for (bcol=0; bcol<nz; bcol++) {
722           gcol = bs*garray[*jj]; jj++;
723           for (col=0; col<bs; col++) {
724             for (row=0; row<bs; row++) {
725               vabs            = PetscAbsScalar(*v); v++;
726               rsum[gcol+col] += vabs;
727               rsum[grow+row] += vabs;
728             }
729           }
730         }
731         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
732       }
733       ierr  = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
734       *norm = 0.0;
735       for (col=0; col<mat->cmap->N; col++) {
736         if (rsum2[col] > *norm) *norm = rsum2[col];
737       }
738       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
739     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
740   }
741   PetscFunctionReturn(0);
742 }
743 
744 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
745 {
746   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
747   PetscErrorCode ierr;
748   PetscInt       nstash,reallocs;
749 
750   PetscFunctionBegin;
751   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
752 
753   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
754   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
755   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
756   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
757   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
758   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
759   PetscFunctionReturn(0);
760 }
761 
762 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
763 {
764   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
765   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
766   PetscErrorCode ierr;
767   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
768   PetscInt       *row,*col;
769   PetscBool      other_disassembled;
770   PetscMPIInt    n;
771   PetscBool      r1,r2,r3;
772   MatScalar      *val;
773 
774   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
775   PetscFunctionBegin;
776   if (!baij->donotstash &&  !mat->nooffprocentries) {
777     while (1) {
778       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
779       if (!flg) break;
780 
781       for (i=0; i<n;) {
782         /* Now identify the consecutive vals belonging to the same row */
783         for (j=i,rstart=row[j]; j<n; j++) {
784           if (row[j] != rstart) break;
785         }
786         if (j < n) ncols = j-i;
787         else       ncols = n-i;
788         /* Now assemble all these values with a single function call */
789         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
790         i    = j;
791       }
792     }
793     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
794     /* Now process the block-stash. Since the values are stashed column-oriented,
795        set the roworiented flag to column oriented, and after MatSetValues()
796        restore the original flags */
797     r1 = baij->roworiented;
798     r2 = a->roworiented;
799     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
800 
801     baij->roworiented = PETSC_FALSE;
802     a->roworiented    = PETSC_FALSE;
803 
804     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
805     while (1) {
806       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
807       if (!flg) break;
808 
809       for (i=0; i<n;) {
810         /* Now identify the consecutive vals belonging to the same row */
811         for (j=i,rstart=row[j]; j<n; j++) {
812           if (row[j] != rstart) break;
813         }
814         if (j < n) ncols = j-i;
815         else       ncols = n-i;
816         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
817         i    = j;
818       }
819     }
820     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
821 
822     baij->roworiented = r1;
823     a->roworiented    = r2;
824 
825     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
826   }
827 
828   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
829   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
830 
831   /* determine if any processor has disassembled, if so we must
832      also disassemble ourselfs, in order that we may reassemble. */
833   /*
834      if nonzero structure of submatrix B cannot change then we know that
835      no processor disassembled thus we can skip this stuff
836   */
837   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
838     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
839     if (mat->was_assembled && !other_disassembled) {
840       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
841     }
842   }
843 
844   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
845     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
846   }
847   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
848   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
849 
850   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
851 
852   baij->rowvalues = NULL;
853 
854   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
855   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
856     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
857     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
858   }
859   PetscFunctionReturn(0);
860 }
861 
862 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
863 #include <petscdraw.h>
864 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
865 {
866   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
867   PetscErrorCode    ierr;
868   PetscInt          bs   = mat->rmap->bs;
869   PetscMPIInt       rank = baij->rank;
870   PetscBool         iascii,isdraw;
871   PetscViewer       sviewer;
872   PetscViewerFormat format;
873 
874   PetscFunctionBegin;
875   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
876   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
877   if (iascii) {
878     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
879     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
880       MatInfo info;
881       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
882       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
883       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
884       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
885       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
886       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
887       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
888       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
889       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
890       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
891       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
892       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
893       PetscFunctionReturn(0);
894     } else if (format == PETSC_VIEWER_ASCII_INFO) {
895       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
896       PetscFunctionReturn(0);
897     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
898       PetscFunctionReturn(0);
899     }
900   }
901 
902   if (isdraw) {
903     PetscDraw draw;
904     PetscBool isnull;
905     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
906     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
907     if (isnull) PetscFunctionReturn(0);
908   }
909 
910   {
911     /* assemble the entire matrix onto first processor. */
912     Mat          A;
913     Mat_SeqSBAIJ *Aloc;
914     Mat_SeqBAIJ  *Bloc;
915     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
916     MatScalar    *a;
917     const char   *matname;
918 
919     /* Should this be the same type as mat? */
920     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
921     if (!rank) {
922       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
923     } else {
924       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
925     }
926     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
927     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
928     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
929     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
930 
931     /* copy over the A part */
932     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
933     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
934     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
935 
936     for (i=0; i<mbs; i++) {
937       rvals[0] = bs*(baij->rstartbs + i);
938       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
939       for (j=ai[i]; j<ai[i+1]; j++) {
940         col = (baij->cstartbs+aj[j])*bs;
941         for (k=0; k<bs; k++) {
942           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
943           col++;
944           a += bs;
945         }
946       }
947     }
948     /* copy over the B part */
949     Bloc = (Mat_SeqBAIJ*)baij->B->data;
950     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
951     for (i=0; i<mbs; i++) {
952 
953       rvals[0] = bs*(baij->rstartbs + i);
954       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
955       for (j=ai[i]; j<ai[i+1]; j++) {
956         col = baij->garray[aj[j]]*bs;
957         for (k=0; k<bs; k++) {
958           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
959           col++;
960           a += bs;
961         }
962       }
963     }
964     ierr = PetscFree(rvals);CHKERRQ(ierr);
965     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
966     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
967     /*
968        Everyone has to call to draw the matrix since the graphics waits are
969        synchronized across all processors that share the PetscDraw object
970     */
971     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
972     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
973     if (!rank) {
974       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
975       ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
976     }
977     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
978     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
979     ierr = MatDestroy(&A);CHKERRQ(ierr);
980   }
981   PetscFunctionReturn(0);
982 }
983 
984 /* Used for both MPIBAIJ and MPISBAIJ matrices */
985 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
986 
987 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
988 {
989   PetscErrorCode ierr;
990   PetscBool      iascii,isdraw,issocket,isbinary;
991 
992   PetscFunctionBegin;
993   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
994   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
995   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
996   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
997   if (iascii || isdraw || issocket) {
998     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
999   } else if (isbinary) {
1000     ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1001   }
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
1006 {
1007   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1008   PetscErrorCode ierr;
1009 
1010   PetscFunctionBegin;
1011 #if defined(PETSC_USE_LOG)
1012   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1013 #endif
1014   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1015   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1016   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1017   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1018 #if defined(PETSC_USE_CTABLE)
1019   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1020 #else
1021   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1022 #endif
1023   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1024   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1025   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1026   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
1027   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
1028   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
1029   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
1030   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
1031   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
1032   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1033   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1034   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1035   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
1036   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
1037   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
1038 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1039   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1040 #endif
1041   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
1042   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
1043   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1044   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1045 
1046   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1047   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1048   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1049   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1050   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1051 #if defined(PETSC_HAVE_ELEMENTAL)
1052   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
1053 #endif
1054 #if defined(PETSC_HAVE_SCALAPACK)
1055   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_scalapack_C",NULL);CHKERRQ(ierr);
1056 #endif
1057   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr);
1058   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr);
1059   PetscFunctionReturn(0);
1060 }
1061 
1062 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1063 {
1064   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1065   PetscErrorCode    ierr;
1066   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1067   PetscScalar       *from;
1068   const PetscScalar *x;
1069 
1070   PetscFunctionBegin;
1071   /* diagonal part */
1072   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1073   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1074 
1075   /* subdiagonal part */
1076   if (!a->B->ops->multhermitiantranspose) SETERRQ1(PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s\n",((PetscObject)a->B)->type_name);
1077   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1078 
1079   /* copy x into the vec slvec0 */
1080   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1081   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1082 
1083   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1084   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1085   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1086 
1087   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1088   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1089   /* supperdiagonal part */
1090   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1095 {
1096   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1097   PetscErrorCode    ierr;
1098   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1099   PetscScalar       *from;
1100   const PetscScalar *x;
1101 
1102   PetscFunctionBegin;
1103   /* diagonal part */
1104   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1105   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1106 
1107   /* subdiagonal part */
1108   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1109 
1110   /* copy x into the vec slvec0 */
1111   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1112   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1113 
1114   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1115   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1116   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1117 
1118   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1119   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1120   /* supperdiagonal part */
1121   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz)
1126 {
1127   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1128   PetscErrorCode    ierr;
1129   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1130   PetscScalar       *from,zero=0.0;
1131   const PetscScalar *x;
1132 
1133   PetscFunctionBegin;
1134   /* diagonal part */
1135   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1136   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1137 
1138   /* subdiagonal part */
1139   if (!a->B->ops->multhermitiantranspose) SETERRQ1(PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s\n",((PetscObject)a->B)->type_name);
1140   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1141 
1142   /* copy x into the vec slvec0 */
1143   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1144   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1145   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1146   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1147 
1148   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1149   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1150   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1151 
1152   /* supperdiagonal part */
1153   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1154   PetscFunctionReturn(0);
1155 }
1156 
1157 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1158 {
1159   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1160   PetscErrorCode    ierr;
1161   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1162   PetscScalar       *from,zero=0.0;
1163   const PetscScalar *x;
1164 
1165   PetscFunctionBegin;
1166   /* diagonal part */
1167   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1168   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1169 
1170   /* subdiagonal part */
1171   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1172 
1173   /* copy x into the vec slvec0 */
1174   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1175   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1176   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1177   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1178 
1179   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1180   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1181   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1182 
1183   /* supperdiagonal part */
1184   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 /*
1189   This only works correctly for square matrices where the subblock A->A is the
1190    diagonal block
1191 */
1192 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1193 {
1194   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1195   PetscErrorCode ierr;
1196 
1197   PetscFunctionBegin;
1198   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1199   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1204 {
1205   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1206   PetscErrorCode ierr;
1207 
1208   PetscFunctionBegin;
1209   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1210   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1215 {
1216   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1217   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1218   PetscErrorCode ierr;
1219   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1220   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1221   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1222 
1223   PetscFunctionBegin;
1224   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1225   mat->getrowactive = PETSC_TRUE;
1226 
1227   if (!mat->rowvalues && (idx || v)) {
1228     /*
1229         allocate enough space to hold information from the longest row.
1230     */
1231     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1232     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1233     PetscInt     max = 1,mbs = mat->mbs,tmp;
1234     for (i=0; i<mbs; i++) {
1235       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1236       if (max < tmp) max = tmp;
1237     }
1238     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1239   }
1240 
1241   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1242   lrow = row - brstart;  /* local row index */
1243 
1244   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1245   if (!v)   {pvA = NULL; pvB = NULL;}
1246   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1247   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1248   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1249   nztot = nzA + nzB;
1250 
1251   cmap = mat->garray;
1252   if (v  || idx) {
1253     if (nztot) {
1254       /* Sort by increasing column numbers, assuming A and B already sorted */
1255       PetscInt imark = -1;
1256       if (v) {
1257         *v = v_p = mat->rowvalues;
1258         for (i=0; i<nzB; i++) {
1259           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1260           else break;
1261         }
1262         imark = i;
1263         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1264         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1265       }
1266       if (idx) {
1267         *idx = idx_p = mat->rowindices;
1268         if (imark > -1) {
1269           for (i=0; i<imark; i++) {
1270             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1271           }
1272         } else {
1273           for (i=0; i<nzB; i++) {
1274             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1275             else break;
1276           }
1277           imark = i;
1278         }
1279         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1280         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1281       }
1282     } else {
1283       if (idx) *idx = NULL;
1284       if (v)   *v   = NULL;
1285     }
1286   }
1287   *nz  = nztot;
1288   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1289   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1290   PetscFunctionReturn(0);
1291 }
1292 
1293 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1294 {
1295   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1296 
1297   PetscFunctionBegin;
1298   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1299   baij->getrowactive = PETSC_FALSE;
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1304 {
1305   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1306   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1307 
1308   PetscFunctionBegin;
1309   aA->getrow_utriangular = PETSC_TRUE;
1310   PetscFunctionReturn(0);
1311 }
1312 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1313 {
1314   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1315   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1316 
1317   PetscFunctionBegin;
1318   aA->getrow_utriangular = PETSC_FALSE;
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1323 {
1324 #if defined(PETSC_USE_COMPLEX)
1325   PetscErrorCode ierr;
1326   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)mat->data;
1327 
1328   PetscFunctionBegin;
1329   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1330   ierr = MatConjugate(a->B);CHKERRQ(ierr);
1331 #else
1332   PetscFunctionBegin;
1333 #endif
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1338 {
1339   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1340   PetscErrorCode ierr;
1341 
1342   PetscFunctionBegin;
1343   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1344   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1345   PetscFunctionReturn(0);
1346 }
1347 
1348 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1349 {
1350   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1351   PetscErrorCode ierr;
1352 
1353   PetscFunctionBegin;
1354   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1355   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1356   PetscFunctionReturn(0);
1357 }
1358 
1359 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1360    Input: isrow       - distributed(parallel),
1361           iscol_local - locally owned (seq)
1362 */
1363 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1364 {
1365   PetscErrorCode ierr;
1366   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1367   const PetscInt *ptr1,*ptr2;
1368 
1369   PetscFunctionBegin;
1370   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1371   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1372   if (sz1 > sz2) {
1373     *flg = PETSC_FALSE;
1374     PetscFunctionReturn(0);
1375   }
1376 
1377   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1378   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1379 
1380   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1381   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1382   ierr = PetscArraycpy(a1,ptr1,sz1);CHKERRQ(ierr);
1383   ierr = PetscArraycpy(a2,ptr2,sz2);CHKERRQ(ierr);
1384   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1385   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1386 
1387   nmatch=0;
1388   k     = 0;
1389   for (i=0; i<sz1; i++){
1390     for (j=k; j<sz2; j++){
1391       if (a1[i] == a2[j]) {
1392         k = j; nmatch++;
1393         break;
1394       }
1395     }
1396   }
1397   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1398   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1399   ierr = PetscFree(a1);CHKERRQ(ierr);
1400   ierr = PetscFree(a2);CHKERRQ(ierr);
1401   if (nmatch < sz1) {
1402     *flg = PETSC_FALSE;
1403   } else {
1404     *flg = PETSC_TRUE;
1405   }
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1410 {
1411   PetscErrorCode ierr;
1412   IS             iscol_local;
1413   PetscInt       csize;
1414   PetscBool      isequal;
1415 
1416   PetscFunctionBegin;
1417   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1418   if (call == MAT_REUSE_MATRIX) {
1419     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1420     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1421   } else {
1422     PetscBool issorted;
1423 
1424     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1425     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1426     ierr = ISSorted(iscol_local, &issorted);CHKERRQ(ierr);
1427     if (!isequal || !issorted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow and be sorted");
1428   }
1429 
1430   /* now call MatCreateSubMatrix_MPIBAIJ() */
1431   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1432   if (call == MAT_INITIAL_MATRIX) {
1433     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1434     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1435   }
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1440 {
1441   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1442   PetscErrorCode ierr;
1443 
1444   PetscFunctionBegin;
1445   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1446   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1447   PetscFunctionReturn(0);
1448 }
1449 
1450 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1451 {
1452   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1453   Mat            A  = a->A,B = a->B;
1454   PetscErrorCode ierr;
1455   PetscLogDouble isend[5],irecv[5];
1456 
1457   PetscFunctionBegin;
1458   info->block_size = (PetscReal)matin->rmap->bs;
1459 
1460   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1461 
1462   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1463   isend[3] = info->memory;  isend[4] = info->mallocs;
1464 
1465   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1466 
1467   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1468   isend[3] += info->memory;  isend[4] += info->mallocs;
1469   if (flag == MAT_LOCAL) {
1470     info->nz_used      = isend[0];
1471     info->nz_allocated = isend[1];
1472     info->nz_unneeded  = isend[2];
1473     info->memory       = isend[3];
1474     info->mallocs      = isend[4];
1475   } else if (flag == MAT_GLOBAL_MAX) {
1476     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1477 
1478     info->nz_used      = irecv[0];
1479     info->nz_allocated = irecv[1];
1480     info->nz_unneeded  = irecv[2];
1481     info->memory       = irecv[3];
1482     info->mallocs      = irecv[4];
1483   } else if (flag == MAT_GLOBAL_SUM) {
1484     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1485 
1486     info->nz_used      = irecv[0];
1487     info->nz_allocated = irecv[1];
1488     info->nz_unneeded  = irecv[2];
1489     info->memory       = irecv[3];
1490     info->mallocs      = irecv[4];
1491   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1492   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1493   info->fill_ratio_needed = 0;
1494   info->factor_mallocs    = 0;
1495   PetscFunctionReturn(0);
1496 }
1497 
1498 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1499 {
1500   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1501   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1502   PetscErrorCode ierr;
1503 
1504   PetscFunctionBegin;
1505   switch (op) {
1506   case MAT_NEW_NONZERO_LOCATIONS:
1507   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1508   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1509   case MAT_KEEP_NONZERO_PATTERN:
1510   case MAT_SUBMAT_SINGLEIS:
1511   case MAT_NEW_NONZERO_LOCATION_ERR:
1512     MatCheckPreallocated(A,1);
1513     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1514     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1515     break;
1516   case MAT_ROW_ORIENTED:
1517     MatCheckPreallocated(A,1);
1518     a->roworiented = flg;
1519 
1520     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1521     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1522     break;
1523   case MAT_FORCE_DIAGONAL_ENTRIES:
1524   case MAT_SORTED_FULL:
1525     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1526     break;
1527   case MAT_IGNORE_OFF_PROC_ENTRIES:
1528     a->donotstash = flg;
1529     break;
1530   case MAT_USE_HASH_TABLE:
1531     a->ht_flag = flg;
1532     break;
1533   case MAT_HERMITIAN:
1534     MatCheckPreallocated(A,1);
1535     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1536 #if defined(PETSC_USE_COMPLEX)
1537     if (flg) { /* need different mat-vec ops */
1538       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1539       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1540       A->ops->multtranspose    = NULL;
1541       A->ops->multtransposeadd = NULL;
1542       A->symmetric = PETSC_FALSE;
1543     }
1544 #endif
1545     break;
1546   case MAT_SPD:
1547   case MAT_SYMMETRIC:
1548     MatCheckPreallocated(A,1);
1549     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1550 #if defined(PETSC_USE_COMPLEX)
1551     if (flg) { /* restore to use default mat-vec ops */
1552       A->ops->mult             = MatMult_MPISBAIJ;
1553       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1554       A->ops->multtranspose    = MatMult_MPISBAIJ;
1555       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1556     }
1557 #endif
1558     break;
1559   case MAT_STRUCTURALLY_SYMMETRIC:
1560     MatCheckPreallocated(A,1);
1561     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1562     break;
1563   case MAT_SYMMETRY_ETERNAL:
1564     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1565     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1566     break;
1567   case MAT_IGNORE_LOWER_TRIANGULAR:
1568     aA->ignore_ltriangular = flg;
1569     break;
1570   case MAT_ERROR_LOWER_TRIANGULAR:
1571     aA->ignore_ltriangular = flg;
1572     break;
1573   case MAT_GETROW_UPPERTRIANGULAR:
1574     aA->getrow_utriangular = flg;
1575     break;
1576   default:
1577     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1578   }
1579   PetscFunctionReturn(0);
1580 }
1581 
1582 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1583 {
1584   PetscErrorCode ierr;
1585 
1586   PetscFunctionBegin;
1587   if (reuse == MAT_INITIAL_MATRIX) {
1588     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1589   }  else if (reuse == MAT_REUSE_MATRIX) {
1590     ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1591   }
1592   PetscFunctionReturn(0);
1593 }
1594 
1595 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1596 {
1597   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1598   Mat            a     = baij->A, b=baij->B;
1599   PetscErrorCode ierr;
1600   PetscInt       nv,m,n;
1601   PetscBool      flg;
1602 
1603   PetscFunctionBegin;
1604   if (ll != rr) {
1605     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1606     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1607   }
1608   if (!ll) PetscFunctionReturn(0);
1609 
1610   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1611   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1612 
1613   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1614   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1615 
1616   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1617 
1618   /* left diagonalscale the off-diagonal part */
1619   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1620 
1621   /* scale the diagonal part */
1622   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1623 
1624   /* right diagonalscale the off-diagonal part */
1625   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1626   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1627   PetscFunctionReturn(0);
1628 }
1629 
1630 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1631 {
1632   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1633   PetscErrorCode ierr;
1634 
1635   PetscFunctionBegin;
1636   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1637   PetscFunctionReturn(0);
1638 }
1639 
1640 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1641 
1642 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1643 {
1644   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1645   Mat            a,b,c,d;
1646   PetscBool      flg;
1647   PetscErrorCode ierr;
1648 
1649   PetscFunctionBegin;
1650   a = matA->A; b = matA->B;
1651   c = matB->A; d = matB->B;
1652 
1653   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1654   if (flg) {
1655     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1656   }
1657   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1658   PetscFunctionReturn(0);
1659 }
1660 
1661 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1662 {
1663   PetscErrorCode ierr;
1664   PetscBool      isbaij;
1665 
1666   PetscFunctionBegin;
1667   ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr);
1668   if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name);
1669   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1670   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1671     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1672     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1673     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1674   } else {
1675     Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1676     Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
1677 
1678     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1679     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1680   }
1681   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1686 {
1687   PetscErrorCode ierr;
1688 
1689   PetscFunctionBegin;
1690   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
1691   PetscFunctionReturn(0);
1692 }
1693 
1694 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1695 {
1696   PetscErrorCode ierr;
1697   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1698   PetscBLASInt   bnz,one=1;
1699   Mat_SeqSBAIJ   *xa,*ya;
1700   Mat_SeqBAIJ    *xb,*yb;
1701 
1702   PetscFunctionBegin;
1703   if (str == SAME_NONZERO_PATTERN) {
1704     PetscScalar alpha = a;
1705     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1706     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1707     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1708     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1709     xb   = (Mat_SeqBAIJ*)xx->B->data;
1710     yb   = (Mat_SeqBAIJ*)yy->B->data;
1711     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1712     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1713     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1714   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1715     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1716     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1717     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1718   } else {
1719     Mat      B;
1720     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1721     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1722     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1723     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1724     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1725     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1726     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1727     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1728     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1729     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1730     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1731     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1732     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1733     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1734     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1735     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1736     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1737     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1738     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1739     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1740   }
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1745 {
1746   PetscErrorCode ierr;
1747   PetscInt       i;
1748   PetscBool      flg;
1749 
1750   PetscFunctionBegin;
1751   ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */
1752   for (i=0; i<n; i++) {
1753     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1754     if (!flg) {
1755       ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr);
1756     }
1757   }
1758   PetscFunctionReturn(0);
1759 }
1760 
1761 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1762 {
1763   PetscErrorCode ierr;
1764   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1765   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1766 
1767   PetscFunctionBegin;
1768   if (!Y->preallocated) {
1769     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1770   } else if (!aij->nz) {
1771     PetscInt nonew = aij->nonew;
1772     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1773     aij->nonew = nonew;
1774   }
1775   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1776   PetscFunctionReturn(0);
1777 }
1778 
1779 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1780 {
1781   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1782   PetscErrorCode ierr;
1783 
1784   PetscFunctionBegin;
1785   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1786   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
1787   if (d) {
1788     PetscInt rstart;
1789     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1790     *d += rstart/A->rmap->bs;
1791 
1792   }
1793   PetscFunctionReturn(0);
1794 }
1795 
1796 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1797 {
1798   PetscFunctionBegin;
1799   *a = ((Mat_MPISBAIJ*)A->data)->A;
1800   PetscFunctionReturn(0);
1801 }
1802 
1803 /* -------------------------------------------------------------------*/
1804 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1805                                        MatGetRow_MPISBAIJ,
1806                                        MatRestoreRow_MPISBAIJ,
1807                                        MatMult_MPISBAIJ,
1808                                /*  4*/ MatMultAdd_MPISBAIJ,
1809                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1810                                        MatMultAdd_MPISBAIJ,
1811                                        NULL,
1812                                        NULL,
1813                                        NULL,
1814                                /* 10*/ NULL,
1815                                        NULL,
1816                                        NULL,
1817                                        MatSOR_MPISBAIJ,
1818                                        MatTranspose_MPISBAIJ,
1819                                /* 15*/ MatGetInfo_MPISBAIJ,
1820                                        MatEqual_MPISBAIJ,
1821                                        MatGetDiagonal_MPISBAIJ,
1822                                        MatDiagonalScale_MPISBAIJ,
1823                                        MatNorm_MPISBAIJ,
1824                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1825                                        MatAssemblyEnd_MPISBAIJ,
1826                                        MatSetOption_MPISBAIJ,
1827                                        MatZeroEntries_MPISBAIJ,
1828                                /* 24*/ NULL,
1829                                        NULL,
1830                                        NULL,
1831                                        NULL,
1832                                        NULL,
1833                                /* 29*/ MatSetUp_MPISBAIJ,
1834                                        NULL,
1835                                        NULL,
1836                                        MatGetDiagonalBlock_MPISBAIJ,
1837                                        NULL,
1838                                /* 34*/ MatDuplicate_MPISBAIJ,
1839                                        NULL,
1840                                        NULL,
1841                                        NULL,
1842                                        NULL,
1843                                /* 39*/ MatAXPY_MPISBAIJ,
1844                                        MatCreateSubMatrices_MPISBAIJ,
1845                                        MatIncreaseOverlap_MPISBAIJ,
1846                                        MatGetValues_MPISBAIJ,
1847                                        MatCopy_MPISBAIJ,
1848                                /* 44*/ NULL,
1849                                        MatScale_MPISBAIJ,
1850                                        MatShift_MPISBAIJ,
1851                                        NULL,
1852                                        NULL,
1853                                /* 49*/ NULL,
1854                                        NULL,
1855                                        NULL,
1856                                        NULL,
1857                                        NULL,
1858                                /* 54*/ NULL,
1859                                        NULL,
1860                                        MatSetUnfactored_MPISBAIJ,
1861                                        NULL,
1862                                        MatSetValuesBlocked_MPISBAIJ,
1863                                /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1864                                        NULL,
1865                                        NULL,
1866                                        NULL,
1867                                        NULL,
1868                                /* 64*/ NULL,
1869                                        NULL,
1870                                        NULL,
1871                                        NULL,
1872                                        NULL,
1873                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1874                                        NULL,
1875                                        MatConvert_MPISBAIJ_Basic,
1876                                        NULL,
1877                                        NULL,
1878                                /* 74*/ NULL,
1879                                        NULL,
1880                                        NULL,
1881                                        NULL,
1882                                        NULL,
1883                                /* 79*/ NULL,
1884                                        NULL,
1885                                        NULL,
1886                                        NULL,
1887                                        MatLoad_MPISBAIJ,
1888                                /* 84*/ NULL,
1889                                        NULL,
1890                                        NULL,
1891                                        NULL,
1892                                        NULL,
1893                                /* 89*/ NULL,
1894                                        NULL,
1895                                        NULL,
1896                                        NULL,
1897                                        NULL,
1898                                /* 94*/ NULL,
1899                                        NULL,
1900                                        NULL,
1901                                        NULL,
1902                                        NULL,
1903                                /* 99*/ NULL,
1904                                        NULL,
1905                                        NULL,
1906                                        MatConjugate_MPISBAIJ,
1907                                        NULL,
1908                                /*104*/ NULL,
1909                                        MatRealPart_MPISBAIJ,
1910                                        MatImaginaryPart_MPISBAIJ,
1911                                        MatGetRowUpperTriangular_MPISBAIJ,
1912                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1913                                /*109*/ NULL,
1914                                        NULL,
1915                                        NULL,
1916                                        NULL,
1917                                        MatMissingDiagonal_MPISBAIJ,
1918                                /*114*/ NULL,
1919                                        NULL,
1920                                        NULL,
1921                                        NULL,
1922                                        NULL,
1923                                /*119*/ NULL,
1924                                        NULL,
1925                                        NULL,
1926                                        NULL,
1927                                        NULL,
1928                                /*124*/ NULL,
1929                                        NULL,
1930                                        NULL,
1931                                        NULL,
1932                                        NULL,
1933                                /*129*/ NULL,
1934                                        NULL,
1935                                        NULL,
1936                                        NULL,
1937                                        NULL,
1938                                /*134*/ NULL,
1939                                        NULL,
1940                                        NULL,
1941                                        NULL,
1942                                        NULL,
1943                                /*139*/ MatSetBlockSizes_Default,
1944                                        NULL,
1945                                        NULL,
1946                                        NULL,
1947                                        NULL,
1948                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
1949 };
1950 
1951 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1952 {
1953   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1954   PetscErrorCode ierr;
1955   PetscInt       i,mbs,Mbs;
1956   PetscMPIInt    size;
1957 
1958   PetscFunctionBegin;
1959   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1960   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1961   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1962   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1963   if (B->rmap->N > B->cmap->N) SETERRQ2(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %D than columns %D",B->rmap->N,B->cmap->N);
1964   if (B->rmap->n > B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %D than columns %D",B->rmap->n,B->cmap->n);
1965 
1966   mbs = B->rmap->n/bs;
1967   Mbs = B->rmap->N/bs;
1968   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1969 
1970   B->rmap->bs = bs;
1971   b->bs2      = bs*bs;
1972   b->mbs      = mbs;
1973   b->Mbs      = Mbs;
1974   b->nbs      = B->cmap->n/bs;
1975   b->Nbs      = B->cmap->N/bs;
1976 
1977   for (i=0; i<=b->size; i++) {
1978     b->rangebs[i] = B->rmap->range[i]/bs;
1979   }
1980   b->rstartbs = B->rmap->rstart/bs;
1981   b->rendbs   = B->rmap->rend/bs;
1982 
1983   b->cstartbs = B->cmap->rstart/bs;
1984   b->cendbs   = B->cmap->rend/bs;
1985 
1986 #if defined(PETSC_USE_CTABLE)
1987   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
1988 #else
1989   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
1990 #endif
1991   ierr = PetscFree(b->garray);CHKERRQ(ierr);
1992   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
1993   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
1994   ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr);
1995   ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr);
1996   ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr);
1997   ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr);
1998   ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr);
1999   ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr);
2000 
2001   /* Because the B will have been resized we simply destroy it and create a new one each time */
2002   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
2003   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2004   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2005   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2006   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2007   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2008 
2009   if (!B->preallocated) {
2010     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2011     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2012     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
2013     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2014     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2015   }
2016 
2017   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2018   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2019 
2020   B->preallocated  = PETSC_TRUE;
2021   B->was_assembled = PETSC_FALSE;
2022   B->assembled     = PETSC_FALSE;
2023   PetscFunctionReturn(0);
2024 }
2025 
2026 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2027 {
2028   PetscInt       m,rstart,cend;
2029   PetscInt       i,j,d,nz,bd, nz_max=0,*d_nnz=NULL,*o_nnz=NULL;
2030   const PetscInt *JJ    =NULL;
2031   PetscScalar    *values=NULL;
2032   PetscBool      roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented;
2033   PetscErrorCode ierr;
2034   PetscBool      nooffprocentries;
2035 
2036   PetscFunctionBegin;
2037   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2038   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2039   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2040   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2041   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2042   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2043   m      = B->rmap->n/bs;
2044   rstart = B->rmap->rstart/bs;
2045   cend   = B->cmap->rend/bs;
2046 
2047   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2048   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2049   for (i=0; i<m; i++) {
2050     nz = ii[i+1] - ii[i];
2051     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2052     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2053     JJ     = jj + ii[i];
2054     bd     = 0;
2055     for (j=0; j<nz; j++) {
2056       if (*JJ >= i + rstart) break;
2057       JJ++;
2058       bd++;
2059     }
2060     d  = 0;
2061     for (; j<nz; j++) {
2062       if (*JJ++ >= cend) break;
2063       d++;
2064     }
2065     d_nnz[i] = d;
2066     o_nnz[i] = nz - d - bd;
2067     nz       = nz - bd;
2068     nz_max = PetscMax(nz_max,nz);
2069   }
2070   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2071   ierr = MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2072   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2073 
2074   values = (PetscScalar*)V;
2075   if (!values) {
2076     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2077   }
2078   for (i=0; i<m; i++) {
2079     PetscInt          row    = i + rstart;
2080     PetscInt          ncols  = ii[i+1] - ii[i];
2081     const PetscInt    *icols = jj + ii[i];
2082     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2083       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2084       ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2085     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2086       PetscInt j;
2087       for (j=0; j<ncols; j++) {
2088         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2089         ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2090       }
2091     }
2092   }
2093 
2094   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2095   nooffprocentries    = B->nooffprocentries;
2096   B->nooffprocentries = PETSC_TRUE;
2097   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2098   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2099   B->nooffprocentries = nooffprocentries;
2100 
2101   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2102   PetscFunctionReturn(0);
2103 }
2104 
2105 /*MC
2106    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2107    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2108    the matrix is stored.
2109 
2110    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2111    can call MatSetOption(Mat, MAT_HERMITIAN);
2112 
2113    Options Database Keys:
2114 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2115 
2116    Notes:
2117      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2118      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2119 
2120    Level: beginner
2121 
2122 .seealso: MatCreateBAIJ(), MATSEQSBAIJ, MatType
2123 M*/
2124 
2125 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2126 {
2127   Mat_MPISBAIJ   *b;
2128   PetscErrorCode ierr;
2129   PetscBool      flg = PETSC_FALSE;
2130 
2131   PetscFunctionBegin;
2132   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2133   B->data = (void*)b;
2134   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2135 
2136   B->ops->destroy = MatDestroy_MPISBAIJ;
2137   B->ops->view    = MatView_MPISBAIJ;
2138   B->assembled    = PETSC_FALSE;
2139   B->insertmode   = NOT_SET_VALUES;
2140 
2141   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr);
2142   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRMPI(ierr);
2143 
2144   /* build local table of row and column ownerships */
2145   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2146 
2147   /* build cache for off array entries formed */
2148   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2149 
2150   b->donotstash  = PETSC_FALSE;
2151   b->colmap      = NULL;
2152   b->garray      = NULL;
2153   b->roworiented = PETSC_TRUE;
2154 
2155   /* stuff used in block assembly */
2156   b->barray = NULL;
2157 
2158   /* stuff used for matrix vector multiply */
2159   b->lvec    = NULL;
2160   b->Mvctx   = NULL;
2161   b->slvec0  = NULL;
2162   b->slvec0b = NULL;
2163   b->slvec1  = NULL;
2164   b->slvec1a = NULL;
2165   b->slvec1b = NULL;
2166   b->sMvctx  = NULL;
2167 
2168   /* stuff for MatGetRow() */
2169   b->rowindices   = NULL;
2170   b->rowvalues    = NULL;
2171   b->getrowactive = PETSC_FALSE;
2172 
2173   /* hash table stuff */
2174   b->ht           = NULL;
2175   b->hd           = NULL;
2176   b->ht_size      = 0;
2177   b->ht_flag      = PETSC_FALSE;
2178   b->ht_fact      = 0;
2179   b->ht_total_ct  = 0;
2180   b->ht_insert_ct = 0;
2181 
2182   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2183   b->ijonly = PETSC_FALSE;
2184 
2185   b->in_loc = NULL;
2186   b->v_loc  = NULL;
2187   b->n_loc  = 0;
2188 
2189   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2190   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2191   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2192   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2193 #if defined(PETSC_HAVE_ELEMENTAL)
2194   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2195 #endif
2196 #if defined(PETSC_HAVE_SCALAPACK)
2197   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_scalapack_C",MatConvert_SBAIJ_ScaLAPACK);CHKERRQ(ierr);
2198 #endif
2199   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr);
2200   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr);
2201 
2202   B->symmetric                  = PETSC_TRUE;
2203   B->structurally_symmetric     = PETSC_TRUE;
2204   B->symmetric_set              = PETSC_TRUE;
2205   B->structurally_symmetric_set = PETSC_TRUE;
2206   B->symmetric_eternal          = PETSC_TRUE;
2207 #if defined(PETSC_USE_COMPLEX)
2208   B->hermitian                  = PETSC_FALSE;
2209   B->hermitian_set              = PETSC_FALSE;
2210 #else
2211   B->hermitian                  = PETSC_TRUE;
2212   B->hermitian_set              = PETSC_TRUE;
2213 #endif
2214 
2215   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2216   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2217   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2218   if (flg) {
2219     PetscReal fact = 1.39;
2220     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2221     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2222     if (fact <= 1.0) fact = 1.39;
2223     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2224     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2225   }
2226   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2227   PetscFunctionReturn(0);
2228 }
2229 
2230 /*MC
2231    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2232 
2233    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2234    and MATMPISBAIJ otherwise.
2235 
2236    Options Database Keys:
2237 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2238 
2239   Level: beginner
2240 
2241 .seealso: MatCreateMPISBAIJ, MATSEQSBAIJ, MATMPISBAIJ
2242 M*/
2243 
2244 /*@C
2245    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2246    the user should preallocate the matrix storage by setting the parameters
2247    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2248    performance can be increased by more than a factor of 50.
2249 
2250    Collective on Mat
2251 
2252    Input Parameters:
2253 +  B - the matrix
2254 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2255           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2256 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2257            submatrix  (same for all local rows)
2258 .  d_nnz - array containing the number of block nonzeros in the various block rows
2259            in the upper triangular and diagonal part of the in diagonal portion of the local
2260            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2261            for the diagonal entry and set a value even if it is zero.
2262 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2263            submatrix (same for all local rows).
2264 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2265            off-diagonal portion of the local submatrix that is right of the diagonal
2266            (possibly different for each block row) or NULL.
2267 
2268 
2269    Options Database Keys:
2270 +   -mat_no_unroll - uses code that does not unroll the loops in the
2271                      block calculations (much slower)
2272 -   -mat_block_size - size of the blocks to use
2273 
2274    Notes:
2275 
2276    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2277    than it must be used on all processors that share the object for that argument.
2278 
2279    If the *_nnz parameter is given then the *_nz parameter is ignored
2280 
2281    Storage Information:
2282    For a square global matrix we define each processor's diagonal portion
2283    to be its local rows and the corresponding columns (a square submatrix);
2284    each processor's off-diagonal portion encompasses the remainder of the
2285    local matrix (a rectangular submatrix).
2286 
2287    The user can specify preallocated storage for the diagonal part of
2288    the local submatrix with either d_nz or d_nnz (not both).  Set
2289    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2290    memory allocation.  Likewise, specify preallocated storage for the
2291    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2292 
2293    You can call MatGetInfo() to get information on how effective the preallocation was;
2294    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2295    You can also run with the option -info and look for messages with the string
2296    malloc in them to see if additional memory allocation was needed.
2297 
2298    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2299    the figure below we depict these three local rows and all columns (0-11).
2300 
2301 .vb
2302            0 1 2 3 4 5 6 7 8 9 10 11
2303           --------------------------
2304    row 3  |. . . d d d o o o o  o  o
2305    row 4  |. . . d d d o o o o  o  o
2306    row 5  |. . . d d d o o o o  o  o
2307           --------------------------
2308 .ve
2309 
2310    Thus, any entries in the d locations are stored in the d (diagonal)
2311    submatrix, and any entries in the o locations are stored in the
2312    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2313    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2314 
2315    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2316    plus the diagonal part of the d matrix,
2317    and o_nz should indicate the number of block nonzeros per row in the o matrix
2318 
2319    In general, for PDE problems in which most nonzeros are near the diagonal,
2320    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2321    or you will get TERRIBLE performance; see the users' manual chapter on
2322    matrices.
2323 
2324    Level: intermediate
2325 
2326 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2327 @*/
2328 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2329 {
2330   PetscErrorCode ierr;
2331 
2332   PetscFunctionBegin;
2333   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2334   PetscValidType(B,1);
2335   PetscValidLogicalCollectiveInt(B,bs,2);
2336   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2337   PetscFunctionReturn(0);
2338 }
2339 
2340 /*@C
2341    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2342    (block compressed row).  For good matrix assembly performance
2343    the user should preallocate the matrix storage by setting the parameters
2344    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2345    performance can be increased by more than a factor of 50.
2346 
2347    Collective
2348 
2349    Input Parameters:
2350 +  comm - MPI communicator
2351 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2352           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2353 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2354            This value should be the same as the local size used in creating the
2355            y vector for the matrix-vector product y = Ax.
2356 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2357            This value should be the same as the local size used in creating the
2358            x vector for the matrix-vector product y = Ax.
2359 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2360 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2361 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2362            submatrix  (same for all local rows)
2363 .  d_nnz - array containing the number of block nonzeros in the various block rows
2364            in the upper triangular portion of the in diagonal portion of the local
2365            (possibly different for each block block row) or NULL.
2366            If you plan to factor the matrix you must leave room for the diagonal entry and
2367            set its value even if it is zero.
2368 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2369            submatrix (same for all local rows).
2370 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2371            off-diagonal portion of the local submatrix (possibly different for
2372            each block row) or NULL.
2373 
2374    Output Parameter:
2375 .  A - the matrix
2376 
2377    Options Database Keys:
2378 +   -mat_no_unroll - uses code that does not unroll the loops in the
2379                      block calculations (much slower)
2380 .   -mat_block_size - size of the blocks to use
2381 -   -mat_mpi - use the parallel matrix data structures even on one processor
2382                (defaults to using SeqBAIJ format on one processor)
2383 
2384    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2385    MatXXXXSetPreallocation() paradigm instead of this routine directly.
2386    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2387 
2388    Notes:
2389    The number of rows and columns must be divisible by blocksize.
2390    This matrix type does not support complex Hermitian operation.
2391 
2392    The user MUST specify either the local or global matrix dimensions
2393    (possibly both).
2394 
2395    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2396    than it must be used on all processors that share the object for that argument.
2397 
2398    If the *_nnz parameter is given then the *_nz parameter is ignored
2399 
2400    Storage Information:
2401    For a square global matrix we define each processor's diagonal portion
2402    to be its local rows and the corresponding columns (a square submatrix);
2403    each processor's off-diagonal portion encompasses the remainder of the
2404    local matrix (a rectangular submatrix).
2405 
2406    The user can specify preallocated storage for the diagonal part of
2407    the local submatrix with either d_nz or d_nnz (not both).  Set
2408    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2409    memory allocation.  Likewise, specify preallocated storage for the
2410    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2411 
2412    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2413    the figure below we depict these three local rows and all columns (0-11).
2414 
2415 .vb
2416            0 1 2 3 4 5 6 7 8 9 10 11
2417           --------------------------
2418    row 3  |. . . d d d o o o o  o  o
2419    row 4  |. . . d d d o o o o  o  o
2420    row 5  |. . . d d d o o o o  o  o
2421           --------------------------
2422 .ve
2423 
2424    Thus, any entries in the d locations are stored in the d (diagonal)
2425    submatrix, and any entries in the o locations are stored in the
2426    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2427    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2428 
2429    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2430    plus the diagonal part of the d matrix,
2431    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2432    In general, for PDE problems in which most nonzeros are near the diagonal,
2433    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2434    or you will get TERRIBLE performance; see the users' manual chapter on
2435    matrices.
2436 
2437    Level: intermediate
2438 
2439 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2440 @*/
2441 
2442 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2443 {
2444   PetscErrorCode ierr;
2445   PetscMPIInt    size;
2446 
2447   PetscFunctionBegin;
2448   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2449   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2450   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
2451   if (size > 1) {
2452     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2453     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2454   } else {
2455     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2456     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2457   }
2458   PetscFunctionReturn(0);
2459 }
2460 
2461 
2462 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2463 {
2464   Mat            mat;
2465   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2466   PetscErrorCode ierr;
2467   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2468   PetscScalar    *array;
2469 
2470   PetscFunctionBegin;
2471   *newmat = NULL;
2472 
2473   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2474   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2475   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2476   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2477   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2478 
2479   mat->factortype   = matin->factortype;
2480   mat->preallocated = PETSC_TRUE;
2481   mat->assembled    = PETSC_TRUE;
2482   mat->insertmode   = NOT_SET_VALUES;
2483 
2484   a      = (Mat_MPISBAIJ*)mat->data;
2485   a->bs2 = oldmat->bs2;
2486   a->mbs = oldmat->mbs;
2487   a->nbs = oldmat->nbs;
2488   a->Mbs = oldmat->Mbs;
2489   a->Nbs = oldmat->Nbs;
2490 
2491   a->size         = oldmat->size;
2492   a->rank         = oldmat->rank;
2493   a->donotstash   = oldmat->donotstash;
2494   a->roworiented  = oldmat->roworiented;
2495   a->rowindices   = NULL;
2496   a->rowvalues    = NULL;
2497   a->getrowactive = PETSC_FALSE;
2498   a->barray       = NULL;
2499   a->rstartbs     = oldmat->rstartbs;
2500   a->rendbs       = oldmat->rendbs;
2501   a->cstartbs     = oldmat->cstartbs;
2502   a->cendbs       = oldmat->cendbs;
2503 
2504   /* hash table stuff */
2505   a->ht           = NULL;
2506   a->hd           = NULL;
2507   a->ht_size      = 0;
2508   a->ht_flag      = oldmat->ht_flag;
2509   a->ht_fact      = oldmat->ht_fact;
2510   a->ht_total_ct  = 0;
2511   a->ht_insert_ct = 0;
2512 
2513   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2);CHKERRQ(ierr);
2514   if (oldmat->colmap) {
2515 #if defined(PETSC_USE_CTABLE)
2516     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2517 #else
2518     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2519     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2520     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
2521 #endif
2522   } else a->colmap = NULL;
2523 
2524   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2525     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2526     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2527     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
2528   } else a->garray = NULL;
2529 
2530   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2531   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2532   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2533   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2534   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2535 
2536   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2537   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2538   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2539   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2540 
2541   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2542   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2543   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2544   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2545   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2546   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2547   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2548   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2549   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2550   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2551   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2552   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2553   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2554 
2555   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2556   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2557   a->sMvctx = oldmat->sMvctx;
2558   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2559 
2560   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2561   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2562   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2563   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2564   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2565   *newmat = mat;
2566   PetscFunctionReturn(0);
2567 }
2568 
2569 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2570 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2571 
2572 PetscErrorCode MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer)
2573 {
2574   PetscErrorCode ierr;
2575   PetscBool      isbinary;
2576 
2577   PetscFunctionBegin;
2578   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2579   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
2580   ierr = MatLoad_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
2581   PetscFunctionReturn(0);
2582 }
2583 
2584 /*XXXXX@
2585    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2586 
2587    Input Parameters:
2588 .  mat  - the matrix
2589 .  fact - factor
2590 
2591    Not Collective on Mat, each process can have a different hash factor
2592 
2593    Level: advanced
2594 
2595   Notes:
2596    This can also be set by the command line option: -mat_use_hash_table fact
2597 
2598 .seealso: MatSetOption()
2599 @XXXXX*/
2600 
2601 
2602 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2603 {
2604   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2605   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2606   PetscReal      atmp;
2607   PetscReal      *work,*svalues,*rvalues;
2608   PetscErrorCode ierr;
2609   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2610   PetscMPIInt    rank,size;
2611   PetscInt       *rowners_bs,dest,count,source;
2612   PetscScalar    *va;
2613   MatScalar      *ba;
2614   MPI_Status     stat;
2615 
2616   PetscFunctionBegin;
2617   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2618   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2619   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2620 
2621   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
2622   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr);
2623 
2624   bs  = A->rmap->bs;
2625   mbs = a->mbs;
2626   Mbs = a->Mbs;
2627   ba  = b->a;
2628   bi  = b->i;
2629   bj  = b->j;
2630 
2631   /* find ownerships */
2632   rowners_bs = A->rmap->range;
2633 
2634   /* each proc creates an array to be distributed */
2635   ierr = PetscCalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2636 
2637   /* row_max for B */
2638   if (rank != size-1) {
2639     for (i=0; i<mbs; i++) {
2640       ncols = bi[1] - bi[0]; bi++;
2641       brow  = bs*i;
2642       for (j=0; j<ncols; j++) {
2643         bcol = bs*(*bj);
2644         for (kcol=0; kcol<bs; kcol++) {
2645           col  = bcol + kcol;                /* local col index */
2646           col += rowners_bs[rank+1];      /* global col index */
2647           for (krow=0; krow<bs; krow++) {
2648             atmp = PetscAbsScalar(*ba); ba++;
2649             row  = brow + krow;   /* local row index */
2650             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2651             if (work[col] < atmp) work[col] = atmp;
2652           }
2653         }
2654         bj++;
2655       }
2656     }
2657 
2658     /* send values to its owners */
2659     for (dest=rank+1; dest<size; dest++) {
2660       svalues = work + rowners_bs[dest];
2661       count   = rowners_bs[dest+1]-rowners_bs[dest];
2662       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2663     }
2664   }
2665 
2666   /* receive values */
2667   if (rank) {
2668     rvalues = work;
2669     count   = rowners_bs[rank+1]-rowners_bs[rank];
2670     for (source=0; source<rank; source++) {
2671       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRMPI(ierr);
2672       /* process values */
2673       for (i=0; i<count; i++) {
2674         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2675       }
2676     }
2677   }
2678 
2679   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2680   ierr = PetscFree(work);CHKERRQ(ierr);
2681   PetscFunctionReturn(0);
2682 }
2683 
2684 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2685 {
2686   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2687   PetscErrorCode    ierr;
2688   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2689   PetscScalar       *x,*ptr,*from;
2690   Vec               bb1;
2691   const PetscScalar *b;
2692 
2693   PetscFunctionBegin;
2694   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2695   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2696 
2697   if (flag == SOR_APPLY_UPPER) {
2698     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2699     PetscFunctionReturn(0);
2700   }
2701 
2702   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2703     if (flag & SOR_ZERO_INITIAL_GUESS) {
2704       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2705       its--;
2706     }
2707 
2708     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2709     while (its--) {
2710 
2711       /* lower triangular part: slvec0b = - B^T*xx */
2712       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2713 
2714       /* copy xx into slvec0a */
2715       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2716       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2717       ierr = PetscArraycpy(ptr,x,bs*mbs);CHKERRQ(ierr);
2718       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2719 
2720       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2721 
2722       /* copy bb into slvec1a */
2723       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2724       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2725       ierr = PetscArraycpy(ptr,b,bs*mbs);CHKERRQ(ierr);
2726       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2727 
2728       /* set slvec1b = 0 */
2729       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2730 
2731       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2732       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2733       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2734       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2735 
2736       /* upper triangular part: bb1 = bb1 - B*x */
2737       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2738 
2739       /* local diagonal sweep */
2740       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2741     }
2742     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2743   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2744     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2745   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2746     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2747   } else if (flag & SOR_EISENSTAT) {
2748     Vec               xx1;
2749     PetscBool         hasop;
2750     const PetscScalar *diag;
2751     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2752     PetscInt          i,n;
2753 
2754     if (!mat->xx1) {
2755       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2756       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2757     }
2758     xx1 = mat->xx1;
2759     bb1 = mat->bb1;
2760 
2761     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2762 
2763     if (!mat->diag) {
2764       /* this is wrong for same matrix with new nonzero values */
2765       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2766       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2767     }
2768     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2769 
2770     if (hasop) {
2771       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2772       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2773     } else {
2774       /*
2775           These two lines are replaced by code that may be a bit faster for a good compiler
2776       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2777       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2778       */
2779       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2780       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2781       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2782       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2783       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2784       if (omega == 1.0) {
2785         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2786         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2787       } else {
2788         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2789         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2790       }
2791       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2792       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2793       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2794       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2795     }
2796 
2797     /* multiply off-diagonal portion of matrix */
2798     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2799     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2800     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2801     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2802     ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
2803     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2804     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2805     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2806     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2807     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2808 
2809     /* local sweep */
2810     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2811     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2812   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2813   PetscFunctionReturn(0);
2814 }
2815 
2816 /*@
2817      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2818          CSR format the local rows.
2819 
2820    Collective
2821 
2822    Input Parameters:
2823 +  comm - MPI communicator
2824 .  bs - the block size, only a block size of 1 is supported
2825 .  m - number of local rows (Cannot be PETSC_DECIDE)
2826 .  n - This value should be the same as the local size used in creating the
2827        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2828        calculated if N is given) For square matrices n is almost always m.
2829 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2830 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2831 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2832 .   j - column indices
2833 -   a - matrix values
2834 
2835    Output Parameter:
2836 .   mat - the matrix
2837 
2838    Level: intermediate
2839 
2840    Notes:
2841        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2842      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2843      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2844 
2845        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2846 
2847 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2848           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2849 @*/
2850 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2851 {
2852   PetscErrorCode ierr;
2853 
2854 
2855   PetscFunctionBegin;
2856   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2857   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2858   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2859   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2860   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2861   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2862   PetscFunctionReturn(0);
2863 }
2864 
2865 
2866 /*@C
2867    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values
2868 
2869    Collective
2870 
2871    Input Parameters:
2872 +  B - the matrix
2873 .  bs - the block size
2874 .  i - the indices into j for the start of each local row (starts with zero)
2875 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2876 -  v - optional values in the matrix
2877 
2878    Level: advanced
2879 
2880    Notes:
2881    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2882    and usually the numerical values as well
2883 
2884    Any entries below the diagonal are ignored
2885 
2886 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2887 @*/
2888 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2889 {
2890   PetscErrorCode ierr;
2891 
2892   PetscFunctionBegin;
2893   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2894   PetscFunctionReturn(0);
2895 }
2896 
2897 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2898 {
2899   PetscErrorCode ierr;
2900   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
2901   PetscInt       *indx;
2902   PetscScalar    *values;
2903 
2904   PetscFunctionBegin;
2905   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2906   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2907     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
2908     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
2909     PetscInt       *bindx,rmax=a->rmax,j;
2910     PetscMPIInt    rank,size;
2911 
2912     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2913     mbs = m/bs; Nbs = N/cbs;
2914     if (n == PETSC_DECIDE) {
2915       ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);
2916     }
2917     nbs = n/cbs;
2918 
2919     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
2920     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
2921 
2922     ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
2923     ierr = MPI_Comm_rank(comm,&size);CHKERRMPI(ierr);
2924     if (rank == size-1) {
2925       /* Check sum(nbs) = Nbs */
2926       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
2927     }
2928 
2929     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
2930     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2931     for (i=0; i<mbs; i++) {
2932       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
2933       nnz  = nnz/bs;
2934       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
2935       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
2936       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
2937     }
2938     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2939     ierr = PetscFree(bindx);CHKERRQ(ierr);
2940 
2941     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2942     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2943     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
2944     ierr = MatSetType(*outmat,MATSBAIJ);CHKERRQ(ierr);
2945     ierr = MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
2946     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2947     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2948   }
2949 
2950   /* numeric phase */
2951   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2952   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
2953 
2954   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2955   for (i=0; i<m; i++) {
2956     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2957     Ii   = i + rstart;
2958     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2959     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2960   }
2961   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2962   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2963   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2964   PetscFunctionReturn(0);
2965 }
2966