xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 94ef8dde638caef1d0cd84a7dc8a2db65fcda8b6)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
11 {
12   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
13   PetscErrorCode ierr;
14 
15   PetscFunctionBegin;
16   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
17   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
18   PetscFunctionReturn(0);
19 }
20 
21 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
22 {
23   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
28   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
29   PetscFunctionReturn(0);
30 }
31 
32 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
33   { \
34  \
35     brow = row/bs;  \
36     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
37     rmax = aimax[brow]; nrow = ailen[brow]; \
38     bcol = col/bs; \
39     ridx = row % bs; cidx = col % bs; \
40     low  = 0; high = nrow; \
41     while (high-low > 3) { \
42       t = (low+high)/2; \
43       if (rp[t] > bcol) high = t; \
44       else              low  = t; \
45     } \
46     for (_i=low; _i<high; _i++) { \
47       if (rp[_i] > bcol) break; \
48       if (rp[_i] == bcol) { \
49         bap = ap + bs2*_i + bs*cidx + ridx; \
50         if (addv == ADD_VALUES) *bap += value;  \
51         else                    *bap  = value;  \
52         goto a_noinsert; \
53       } \
54     } \
55     if (a->nonew == 1) goto a_noinsert; \
56     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
57     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
58     N = nrow++ - 1;  \
59     /* shift up all the later entries in this row */ \
60     for (ii=N; ii>=_i; ii--) { \
61       rp[ii+1] = rp[ii]; \
62       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
63     } \
64     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
65     rp[_i]                      = bcol;  \
66     ap[bs2*_i + bs*cidx + ridx] = value;  \
67     A->nonzerostate++;\
68 a_noinsert:; \
69     ailen[brow] = nrow; \
70   }
71 
72 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
73   { \
74     brow = row/bs;  \
75     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
76     rmax = bimax[brow]; nrow = bilen[brow]; \
77     bcol = col/bs; \
78     ridx = row % bs; cidx = col % bs; \
79     low  = 0; high = nrow; \
80     while (high-low > 3) { \
81       t = (low+high)/2; \
82       if (rp[t] > bcol) high = t; \
83       else              low  = t; \
84     } \
85     for (_i=low; _i<high; _i++) { \
86       if (rp[_i] > bcol) break; \
87       if (rp[_i] == bcol) { \
88         bap = ap + bs2*_i + bs*cidx + ridx; \
89         if (addv == ADD_VALUES) *bap += value;  \
90         else                    *bap  = value;  \
91         goto b_noinsert; \
92       } \
93     } \
94     if (b->nonew == 1) goto b_noinsert; \
95     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
96     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
97     N = nrow++ - 1;  \
98     /* shift up all the later entries in this row */ \
99     for (ii=N; ii>=_i; ii--) { \
100       rp[ii+1] = rp[ii]; \
101       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
102     } \
103     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
104     rp[_i]                      = bcol;  \
105     ap[bs2*_i + bs*cidx + ridx] = value;  \
106     B->nonzerostate++;\
107 b_noinsert:; \
108     bilen[brow] = nrow; \
109   }
110 
111 /* Only add/insert a(i,j) with i<=j (blocks).
112    Any a(i,j) with i>j input by user is ingored.
113 */
114 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
115 {
116   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
117   MatScalar      value;
118   PetscBool      roworiented = baij->roworiented;
119   PetscErrorCode ierr;
120   PetscInt       i,j,row,col;
121   PetscInt       rstart_orig=mat->rmap->rstart;
122   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
123   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
124 
125   /* Some Variables required in the macro */
126   Mat          A     = baij->A;
127   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
128   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
129   MatScalar    *aa   =a->a;
130 
131   Mat         B     = baij->B;
132   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
133   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
134   MatScalar   *ba   =b->a;
135 
136   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
137   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
138   MatScalar *ap,*bap;
139 
140   /* for stash */
141   PetscInt  n_loc, *in_loc = NULL;
142   MatScalar *v_loc = NULL;
143 
144   PetscFunctionBegin;
145   if (!baij->donotstash) {
146     if (n > baij->n_loc) {
147       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
148       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
149       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
150       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
151 
152       baij->n_loc = n;
153     }
154     in_loc = baij->in_loc;
155     v_loc  = baij->v_loc;
156   }
157 
158   for (i=0; i<m; i++) {
159     if (im[i] < 0) continue;
160 #if defined(PETSC_USE_DEBUG)
161     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
162 #endif
163     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
164       row = im[i] - rstart_orig;              /* local row index */
165       for (j=0; j<n; j++) {
166         if (im[i]/bs > in[j]/bs) {
167           if (a->ignore_ltriangular) {
168             continue;    /* ignore lower triangular blocks */
169           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
170         }
171         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
172           col  = in[j] - cstart_orig;         /* local col index */
173           brow = row/bs; bcol = col/bs;
174           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
175           if (roworiented) value = v[i*n+j];
176           else             value = v[i+j*m];
177           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
178           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
179         } else if (in[j] < 0) continue;
180 #if defined(PETSC_USE_DEBUG)
181         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
182 #endif
183         else {  /* off-diag entry (B) */
184           if (mat->was_assembled) {
185             if (!baij->colmap) {
186               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
187             }
188 #if defined(PETSC_USE_CTABLE)
189             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
190             col  = col - 1;
191 #else
192             col = baij->colmap[in[j]/bs] - 1;
193 #endif
194             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
195               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
196               col  =  in[j];
197               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
198               B    = baij->B;
199               b    = (Mat_SeqBAIJ*)(B)->data;
200               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
201               ba   = b->a;
202             } else col += in[j]%bs;
203           } else col = in[j];
204           if (roworiented) value = v[i*n+j];
205           else             value = v[i+j*m];
206           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
207           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
208         }
209       }
210     } else {  /* off processor entry */
211       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
212       if (!baij->donotstash) {
213         mat->assembled = PETSC_FALSE;
214         n_loc          = 0;
215         for (j=0; j<n; j++) {
216           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
217           in_loc[n_loc] = in[j];
218           if (roworiented) {
219             v_loc[n_loc] = v[i*n+j];
220           } else {
221             v_loc[n_loc] = v[j*m+i];
222           }
223           n_loc++;
224         }
225         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
226       }
227     }
228   }
229   PetscFunctionReturn(0);
230 }
231 
232 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
233 {
234   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
235   PetscErrorCode    ierr;
236   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
237   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
238   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
239   PetscBool         roworiented=a->roworiented;
240   const PetscScalar *value     = v;
241   MatScalar         *ap,*aa = a->a,*bap;
242 
243   PetscFunctionBegin;
244   if (col < row) {
245     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
246     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
247   }
248   rp   = aj + ai[row];
249   ap   = aa + bs2*ai[row];
250   rmax = imax[row];
251   nrow = ailen[row];
252   value = v;
253   low   = 0;
254   high  = nrow;
255 
256   while (high-low > 7) {
257     t = (low+high)/2;
258     if (rp[t] > col) high = t;
259     else             low  = t;
260   }
261   for (i=low; i<high; i++) {
262     if (rp[i] > col) break;
263     if (rp[i] == col) {
264       bap = ap +  bs2*i;
265       if (roworiented) {
266         if (is == ADD_VALUES) {
267           for (ii=0; ii<bs; ii++) {
268             for (jj=ii; jj<bs2; jj+=bs) {
269               bap[jj] += *value++;
270             }
271           }
272         } else {
273           for (ii=0; ii<bs; ii++) {
274             for (jj=ii; jj<bs2; jj+=bs) {
275               bap[jj] = *value++;
276             }
277           }
278         }
279       } else {
280         if (is == ADD_VALUES) {
281           for (ii=0; ii<bs; ii++) {
282             for (jj=0; jj<bs; jj++) {
283               *bap++ += *value++;
284             }
285           }
286         } else {
287           for (ii=0; ii<bs; ii++) {
288             for (jj=0; jj<bs; jj++) {
289               *bap++  = *value++;
290             }
291           }
292         }
293       }
294       goto noinsert2;
295     }
296   }
297   if (nonew == 1) goto noinsert2;
298   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
299   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
300   N = nrow++ - 1; high++;
301   /* shift up all the later entries in this row */
302   for (ii=N; ii>=i; ii--) {
303     rp[ii+1] = rp[ii];
304     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
305   }
306   if (N >= i) {
307     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
308   }
309   rp[i] = col;
310   bap   = ap +  bs2*i;
311   if (roworiented) {
312     for (ii=0; ii<bs; ii++) {
313       for (jj=ii; jj<bs2; jj+=bs) {
314         bap[jj] = *value++;
315       }
316     }
317   } else {
318     for (ii=0; ii<bs; ii++) {
319       for (jj=0; jj<bs; jj++) {
320         *bap++ = *value++;
321       }
322     }
323   }
324   noinsert2:;
325   ailen[row] = nrow;
326   PetscFunctionReturn(0);
327 }
328 
329 /*
330    This routine is exactly duplicated in mpibaij.c
331 */
332 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
333 {
334   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
335   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
336   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
337   PetscErrorCode    ierr;
338   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
339   PetscBool         roworiented=a->roworiented;
340   const PetscScalar *value     = v;
341   MatScalar         *ap,*aa = a->a,*bap;
342 
343   PetscFunctionBegin;
344   rp   = aj + ai[row];
345   ap   = aa + bs2*ai[row];
346   rmax = imax[row];
347   nrow = ailen[row];
348   low  = 0;
349   high = nrow;
350   value = v;
351   while (high-low > 7) {
352     t = (low+high)/2;
353     if (rp[t] > col) high = t;
354     else             low  = t;
355   }
356   for (i=low; i<high; i++) {
357     if (rp[i] > col) break;
358     if (rp[i] == col) {
359       bap = ap +  bs2*i;
360       if (roworiented) {
361         if (is == ADD_VALUES) {
362           for (ii=0; ii<bs; ii++) {
363             for (jj=ii; jj<bs2; jj+=bs) {
364               bap[jj] += *value++;
365             }
366           }
367         } else {
368           for (ii=0; ii<bs; ii++) {
369             for (jj=ii; jj<bs2; jj+=bs) {
370               bap[jj] = *value++;
371             }
372           }
373         }
374       } else {
375         if (is == ADD_VALUES) {
376           for (ii=0; ii<bs; ii++,value+=bs) {
377             for (jj=0; jj<bs; jj++) {
378               bap[jj] += value[jj];
379             }
380             bap += bs;
381           }
382         } else {
383           for (ii=0; ii<bs; ii++,value+=bs) {
384             for (jj=0; jj<bs; jj++) {
385               bap[jj]  = value[jj];
386             }
387             bap += bs;
388           }
389         }
390       }
391       goto noinsert2;
392     }
393   }
394   if (nonew == 1) goto noinsert2;
395   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
396   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
397   N = nrow++ - 1; high++;
398   /* shift up all the later entries in this row */
399   for (ii=N; ii>=i; ii--) {
400     rp[ii+1] = rp[ii];
401     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
402   }
403   if (N >= i) {
404     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
405   }
406   rp[i] = col;
407   bap   = ap +  bs2*i;
408   if (roworiented) {
409     for (ii=0; ii<bs; ii++) {
410       for (jj=ii; jj<bs2; jj+=bs) {
411         bap[jj] = *value++;
412       }
413     }
414   } else {
415     for (ii=0; ii<bs; ii++) {
416       for (jj=0; jj<bs; jj++) {
417         *bap++ = *value++;
418       }
419     }
420   }
421   noinsert2:;
422   ailen[row] = nrow;
423   PetscFunctionReturn(0);
424 }
425 
426 /*
427     This routine could be optimized by removing the need for the block copy below and passing stride information
428   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
429 */
430 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
431 {
432   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
433   const MatScalar *value;
434   MatScalar       *barray     =baij->barray;
435   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
436   PetscErrorCode  ierr;
437   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
438   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
439   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
440 
441   PetscFunctionBegin;
442   if (!barray) {
443     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
444     baij->barray = barray;
445   }
446 
447   if (roworiented) {
448     stepval = (n-1)*bs;
449   } else {
450     stepval = (m-1)*bs;
451   }
452   for (i=0; i<m; i++) {
453     if (im[i] < 0) continue;
454 #if defined(PETSC_USE_DEBUG)
455     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
456 #endif
457     if (im[i] >= rstart && im[i] < rend) {
458       row = im[i] - rstart;
459       for (j=0; j<n; j++) {
460         if (im[i] > in[j]) {
461           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
462           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
463         }
464         /* If NumCol = 1 then a copy is not required */
465         if ((roworiented) && (n == 1)) {
466           barray = (MatScalar*) v + i*bs2;
467         } else if ((!roworiented) && (m == 1)) {
468           barray = (MatScalar*) v + j*bs2;
469         } else { /* Here a copy is required */
470           if (roworiented) {
471             value = v + i*(stepval+bs)*bs + j*bs;
472           } else {
473             value = v + j*(stepval+bs)*bs + i*bs;
474           }
475           for (ii=0; ii<bs; ii++,value+=stepval) {
476             for (jj=0; jj<bs; jj++) {
477               *barray++ = *value++;
478             }
479           }
480           barray -=bs2;
481         }
482 
483         if (in[j] >= cstart && in[j] < cend) {
484           col  = in[j] - cstart;
485           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
486         } else if (in[j] < 0) continue;
487 #if defined(PETSC_USE_DEBUG)
488         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
489 #endif
490         else {
491           if (mat->was_assembled) {
492             if (!baij->colmap) {
493               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
494             }
495 
496 #if defined(PETSC_USE_DEBUG)
497 #if defined(PETSC_USE_CTABLE)
498             { PetscInt data;
499               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
500               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
501             }
502 #else
503             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
504 #endif
505 #endif
506 #if defined(PETSC_USE_CTABLE)
507             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
508             col  = (col - 1)/bs;
509 #else
510             col = (baij->colmap[in[j]] - 1)/bs;
511 #endif
512             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
513               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
514               col  = in[j];
515             }
516           } else col = in[j];
517           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
518         }
519       }
520     } else {
521       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
522       if (!baij->donotstash) {
523         if (roworiented) {
524           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
525         } else {
526           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
527         }
528       }
529     }
530   }
531   PetscFunctionReturn(0);
532 }
533 
534 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
535 {
536   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
537   PetscErrorCode ierr;
538   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
539   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
540 
541   PetscFunctionBegin;
542   for (i=0; i<m; i++) {
543     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
544     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
545     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
546       row = idxm[i] - bsrstart;
547       for (j=0; j<n; j++) {
548         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
549         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
550         if (idxn[j] >= bscstart && idxn[j] < bscend) {
551           col  = idxn[j] - bscstart;
552           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
553         } else {
554           if (!baij->colmap) {
555             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
556           }
557 #if defined(PETSC_USE_CTABLE)
558           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
559           data--;
560 #else
561           data = baij->colmap[idxn[j]/bs]-1;
562 #endif
563           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
564           else {
565             col  = data + idxn[j]%bs;
566             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
567           }
568         }
569       }
570     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
571   }
572   PetscFunctionReturn(0);
573 }
574 
575 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
576 {
577   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
578   PetscErrorCode ierr;
579   PetscReal      sum[2],*lnorm2;
580 
581   PetscFunctionBegin;
582   if (baij->size == 1) {
583     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
584   } else {
585     if (type == NORM_FROBENIUS) {
586       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
587       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
588       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
589       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
590       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
591       ierr    = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
592       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
593       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
594     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
595       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
596       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
597       PetscReal    *rsum,*rsum2,vabs;
598       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
599       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
600       MatScalar    *v;
601 
602       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
603       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
604       /* Amat */
605       v = amat->a; jj = amat->j;
606       for (brow=0; brow<mbs; brow++) {
607         grow = bs*(rstart + brow);
608         nz   = amat->i[brow+1] - amat->i[brow];
609         for (bcol=0; bcol<nz; bcol++) {
610           gcol = bs*(rstart + *jj); jj++;
611           for (col=0; col<bs; col++) {
612             for (row=0; row<bs; row++) {
613               vabs            = PetscAbsScalar(*v); v++;
614               rsum[gcol+col] += vabs;
615               /* non-diagonal block */
616               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
617             }
618           }
619         }
620         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
621       }
622       /* Bmat */
623       v = bmat->a; jj = bmat->j;
624       for (brow=0; brow<mbs; brow++) {
625         grow = bs*(rstart + brow);
626         nz = bmat->i[brow+1] - bmat->i[brow];
627         for (bcol=0; bcol<nz; bcol++) {
628           gcol = bs*garray[*jj]; jj++;
629           for (col=0; col<bs; col++) {
630             for (row=0; row<bs; row++) {
631               vabs            = PetscAbsScalar(*v); v++;
632               rsum[gcol+col] += vabs;
633               rsum[grow+row] += vabs;
634             }
635           }
636         }
637         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
638       }
639       ierr  = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
640       *norm = 0.0;
641       for (col=0; col<mat->cmap->N; col++) {
642         if (rsum2[col] > *norm) *norm = rsum2[col];
643       }
644       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
645     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
646   }
647   PetscFunctionReturn(0);
648 }
649 
650 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
651 {
652   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
653   PetscErrorCode ierr;
654   PetscInt       nstash,reallocs;
655 
656   PetscFunctionBegin;
657   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
658 
659   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
660   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
661   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
662   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
663   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
664   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
665   PetscFunctionReturn(0);
666 }
667 
668 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
669 {
670   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
671   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
672   PetscErrorCode ierr;
673   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
674   PetscInt       *row,*col;
675   PetscBool      other_disassembled;
676   PetscMPIInt    n;
677   PetscBool      r1,r2,r3;
678   MatScalar      *val;
679 
680   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
681   PetscFunctionBegin;
682   if (!baij->donotstash &&  !mat->nooffprocentries) {
683     while (1) {
684       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
685       if (!flg) break;
686 
687       for (i=0; i<n;) {
688         /* Now identify the consecutive vals belonging to the same row */
689         for (j=i,rstart=row[j]; j<n; j++) {
690           if (row[j] != rstart) break;
691         }
692         if (j < n) ncols = j-i;
693         else       ncols = n-i;
694         /* Now assemble all these values with a single function call */
695         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
696         i    = j;
697       }
698     }
699     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
700     /* Now process the block-stash. Since the values are stashed column-oriented,
701        set the roworiented flag to column oriented, and after MatSetValues()
702        restore the original flags */
703     r1 = baij->roworiented;
704     r2 = a->roworiented;
705     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
706 
707     baij->roworiented = PETSC_FALSE;
708     a->roworiented    = PETSC_FALSE;
709 
710     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
711     while (1) {
712       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
713       if (!flg) break;
714 
715       for (i=0; i<n;) {
716         /* Now identify the consecutive vals belonging to the same row */
717         for (j=i,rstart=row[j]; j<n; j++) {
718           if (row[j] != rstart) break;
719         }
720         if (j < n) ncols = j-i;
721         else       ncols = n-i;
722         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
723         i    = j;
724       }
725     }
726     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
727 
728     baij->roworiented = r1;
729     a->roworiented    = r2;
730 
731     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
732   }
733 
734   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
735   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
736 
737   /* determine if any processor has disassembled, if so we must
738      also disassemble ourselfs, in order that we may reassemble. */
739   /*
740      if nonzero structure of submatrix B cannot change then we know that
741      no processor disassembled thus we can skip this stuff
742   */
743   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
744     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
745     if (mat->was_assembled && !other_disassembled) {
746       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
747     }
748   }
749 
750   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
751     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
752   }
753   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
754   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
755 
756   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
757 
758   baij->rowvalues = 0;
759 
760   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
761   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
762     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
763     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
769 #include <petscdraw.h>
770 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
771 {
772   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
773   PetscErrorCode    ierr;
774   PetscInt          bs   = mat->rmap->bs;
775   PetscMPIInt       rank = baij->rank;
776   PetscBool         iascii,isdraw;
777   PetscViewer       sviewer;
778   PetscViewerFormat format;
779 
780   PetscFunctionBegin;
781   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
782   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
783   if (iascii) {
784     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
785     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
786       MatInfo info;
787       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
788       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
789       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
790       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
791       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
792       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
793       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
794       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
795       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
796       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
797       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
798       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
799       PetscFunctionReturn(0);
800     } else if (format == PETSC_VIEWER_ASCII_INFO) {
801       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
802       PetscFunctionReturn(0);
803     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
804       PetscFunctionReturn(0);
805     }
806   }
807 
808   if (isdraw) {
809     PetscDraw draw;
810     PetscBool isnull;
811     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
812     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
813     if (isnull) PetscFunctionReturn(0);
814   }
815 
816   {
817     /* assemble the entire matrix onto first processor. */
818     Mat          A;
819     Mat_SeqSBAIJ *Aloc;
820     Mat_SeqBAIJ  *Bloc;
821     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
822     MatScalar    *a;
823     const char   *matname;
824 
825     /* Should this be the same type as mat? */
826     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
827     if (!rank) {
828       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
829     } else {
830       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
831     }
832     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
833     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
834     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
835     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
836 
837     /* copy over the A part */
838     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
839     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
840     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
841 
842     for (i=0; i<mbs; i++) {
843       rvals[0] = bs*(baij->rstartbs + i);
844       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
845       for (j=ai[i]; j<ai[i+1]; j++) {
846         col = (baij->cstartbs+aj[j])*bs;
847         for (k=0; k<bs; k++) {
848           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
849           col++;
850           a += bs;
851         }
852       }
853     }
854     /* copy over the B part */
855     Bloc = (Mat_SeqBAIJ*)baij->B->data;
856     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
857     for (i=0; i<mbs; i++) {
858 
859       rvals[0] = bs*(baij->rstartbs + i);
860       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
861       for (j=ai[i]; j<ai[i+1]; j++) {
862         col = baij->garray[aj[j]]*bs;
863         for (k=0; k<bs; k++) {
864           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
865           col++;
866           a += bs;
867         }
868       }
869     }
870     ierr = PetscFree(rvals);CHKERRQ(ierr);
871     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
872     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
873     /*
874        Everyone has to call to draw the matrix since the graphics waits are
875        synchronized across all processors that share the PetscDraw object
876     */
877     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
878     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
879     if (!rank) {
880       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
881       ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
882     }
883     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
884     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
885     ierr = MatDestroy(&A);CHKERRQ(ierr);
886   }
887   PetscFunctionReturn(0);
888 }
889 
890 static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer)
891 {
892   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)mat->data;
893   Mat_SeqSBAIJ   *A = (Mat_SeqSBAIJ*)a->A->data;
894   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
895   PetscErrorCode ierr;
896   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
897   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
898   int            fd;
899   PetscScalar    *column_values;
900   FILE           *file;
901   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
902   PetscInt       message_count,flowcontrolcount;
903 
904   PetscFunctionBegin;
905   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
906   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
907   nz   = bs2*(A->nz + B->nz);
908   rlen = mat->rmap->n;
909   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
910   if (!rank) {
911     header[0] = MAT_FILE_CLASSID;
912     header[1] = mat->rmap->N;
913     header[2] = mat->cmap->N;
914 
915     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
916     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
917     /* get largest number of rows any processor has */
918     range = mat->rmap->range;
919     for (i=1; i<size; i++) {
920       rlen = PetscMax(rlen,range[i+1] - range[i]);
921     }
922   } else {
923     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
924   }
925 
926   ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr);
927   /* compute lengths of each row  */
928   for (i=0; i<a->mbs; i++) {
929     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
930   }
931   /* store the row lengths to the file */
932   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
933   if (!rank) {
934     MPI_Status status;
935     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
936     rlen = (range[1] - range[0])/bs;
937     for (i=0; i<rlen; i++) {
938       for (j=0; j<bs; j++) {
939         row_lens[i*bs+j] = bs*crow_lens[i];
940       }
941     }
942     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
943     for (i=1; i<size; i++) {
944       rlen = (range[i+1] - range[i])/bs;
945       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
946       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
947       for (k=0; k<rlen; k++) {
948         for (j=0; j<bs; j++) {
949           row_lens[k*bs+j] = bs*crow_lens[k];
950         }
951       }
952       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
953     }
954     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
955     ierr = PetscFree(row_lens);CHKERRQ(ierr);
956   } else {
957     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
958     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
959     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
960   }
961   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
962 
963   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
964      information needed to make it for each row from a block row. This does require more communication but still not more than
965      the communication needed for the nonzero values  */
966   nzmax = nz; /*  space a largest processor needs */
967   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
968   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
969   cnt   = 0;
970   for (i=0; i<a->mbs; i++) {
971     pcnt = cnt;
972     for (j=B->i[i]; j<B->i[i+1]; j++) {
973       if ((col = garray[B->j[j]]) > cstart) break;
974       for (l=0; l<bs; l++) {
975         column_indices[cnt++] = bs*col+l;
976       }
977     }
978     for (k=A->i[i]; k<A->i[i+1]; k++) {
979       for (l=0; l<bs; l++) {
980         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
981       }
982     }
983     for (; j<B->i[i+1]; j++) {
984       for (l=0; l<bs; l++) {
985         column_indices[cnt++] = bs*garray[B->j[j]]+l;
986       }
987     }
988     len = cnt - pcnt;
989     for (k=1; k<bs; k++) {
990       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
991       cnt += len;
992     }
993   }
994   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
995 
996   /* store the columns to the file */
997   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
998   if (!rank) {
999     MPI_Status status;
1000     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1001     for (i=1; i<size; i++) {
1002       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1003       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1004       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1005       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1006     }
1007     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1008   } else {
1009     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1010     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1011     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1012     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1013   }
1014   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1015 
1016   /* load up the numerical values */
1017   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1018   cnt  = 0;
1019   for (i=0; i<a->mbs; i++) {
1020     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1021     for (j=B->i[i]; j<B->i[i+1]; j++) {
1022       if (garray[B->j[j]] > cstart) break;
1023       for (l=0; l<bs; l++) {
1024         for (ll=0; ll<bs; ll++) {
1025           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1026         }
1027       }
1028       cnt += bs;
1029     }
1030     for (k=A->i[i]; k<A->i[i+1]; k++) {
1031       for (l=0; l<bs; l++) {
1032         for (ll=0; ll<bs; ll++) {
1033           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1034         }
1035       }
1036       cnt += bs;
1037     }
1038     for (; j<B->i[i+1]; j++) {
1039       for (l=0; l<bs; l++) {
1040         for (ll=0; ll<bs; ll++) {
1041           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1042         }
1043       }
1044       cnt += bs;
1045     }
1046     cnt += (bs-1)*rlen;
1047   }
1048   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1049 
1050   /* store the column values to the file */
1051   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1052   if (!rank) {
1053     MPI_Status status;
1054     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1055     for (i=1; i<size; i++) {
1056       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1057       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1058       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1059       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1060     }
1061     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1062   } else {
1063     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1064     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1065     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1066     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1067   }
1068   ierr = PetscFree(column_values);CHKERRQ(ierr);
1069 
1070   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1071   if (file) {
1072     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1073   }
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
1078 {
1079   PetscErrorCode ierr;
1080   PetscBool      iascii,isdraw,issocket,isbinary;
1081 
1082   PetscFunctionBegin;
1083   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1084   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1085   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1086   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1087   if (iascii || isdraw || issocket) {
1088     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1089   } else if (isbinary) {
1090     ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1091   }
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
1096 {
1097   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1098   PetscErrorCode ierr;
1099 
1100   PetscFunctionBegin;
1101 #if defined(PETSC_USE_LOG)
1102   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1103 #endif
1104   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1105   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1106   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1107   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1108 #if defined(PETSC_USE_CTABLE)
1109   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1110 #else
1111   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1112 #endif
1113   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1114   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1115   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1116   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
1117   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
1118   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
1119   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
1120   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
1121   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
1122   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1123   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1124   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1125   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
1126   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
1127   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
1128 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1129   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1130 #endif
1131   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
1132   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
1133   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1134   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1135 
1136   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1137   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1138   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1139   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1140   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
1141 #if defined(PETSC_HAVE_ELEMENTAL)
1142   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
1143 #endif
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1150   PetscErrorCode    ierr;
1151   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1152   PetscScalar       *from;
1153   const PetscScalar *x;
1154 
1155   PetscFunctionBegin;
1156   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1157   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1158 
1159   /* diagonal part */
1160   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1161   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1162 
1163   /* subdiagonal part */
1164   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1165 
1166   /* copy x into the vec slvec0 */
1167   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1168   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1169 
1170   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1171   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1172   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1173 
1174   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1175   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1176   /* supperdiagonal part */
1177   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1182 {
1183   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1184   PetscErrorCode    ierr;
1185   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1186   PetscScalar       *from;
1187   const PetscScalar *x;
1188 
1189   PetscFunctionBegin;
1190   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1191   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1192 
1193   /* diagonal part */
1194   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1195   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1196 
1197   /* subdiagonal part */
1198   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1199 
1200   /* copy x into the vec slvec0 */
1201   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1202   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1203 
1204   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1205   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1206   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1207 
1208   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1209   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1210   /* supperdiagonal part */
1211   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1212   PetscFunctionReturn(0);
1213 }
1214 
1215 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1216 {
1217   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1218   PetscErrorCode ierr;
1219   PetscInt       nt;
1220 
1221   PetscFunctionBegin;
1222   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1223   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1224 
1225   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1226   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1227 
1228   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1229   /* do diagonal part */
1230   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1231   /* do supperdiagonal part */
1232   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1233   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1234   /* do subdiagonal part */
1235   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1236   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1237   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1242 {
1243   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1244   PetscErrorCode    ierr;
1245   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1246   PetscScalar       *from,zero=0.0;
1247   const PetscScalar *x;
1248 
1249   PetscFunctionBegin;
1250   /*
1251   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
1252   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
1253   */
1254   /* diagonal part */
1255   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1256   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1257 
1258   /* subdiagonal part */
1259   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1260 
1261   /* copy x into the vec slvec0 */
1262   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1263   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1264   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1265   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1266 
1267   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1268   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1269   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1270 
1271   /* supperdiagonal part */
1272   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1277 {
1278   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1279   PetscErrorCode ierr;
1280 
1281   PetscFunctionBegin;
1282   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1283   /* do diagonal part */
1284   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1285   /* do supperdiagonal part */
1286   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1287   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1288 
1289   /* do subdiagonal part */
1290   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1291   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1292   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1293   PetscFunctionReturn(0);
1294 }
1295 
1296 /*
1297   This only works correctly for square matrices where the subblock A->A is the
1298    diagonal block
1299 */
1300 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1301 {
1302   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1303   PetscErrorCode ierr;
1304 
1305   PetscFunctionBegin;
1306   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1307   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1312 {
1313   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1314   PetscErrorCode ierr;
1315 
1316   PetscFunctionBegin;
1317   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1318   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1323 {
1324   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1325   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1326   PetscErrorCode ierr;
1327   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1328   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1329   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1330 
1331   PetscFunctionBegin;
1332   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1333   mat->getrowactive = PETSC_TRUE;
1334 
1335   if (!mat->rowvalues && (idx || v)) {
1336     /*
1337         allocate enough space to hold information from the longest row.
1338     */
1339     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1340     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1341     PetscInt     max = 1,mbs = mat->mbs,tmp;
1342     for (i=0; i<mbs; i++) {
1343       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1344       if (max < tmp) max = tmp;
1345     }
1346     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1347   }
1348 
1349   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1350   lrow = row - brstart;  /* local row index */
1351 
1352   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1353   if (!v)   {pvA = 0; pvB = 0;}
1354   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1355   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1356   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1357   nztot = nzA + nzB;
1358 
1359   cmap = mat->garray;
1360   if (v  || idx) {
1361     if (nztot) {
1362       /* Sort by increasing column numbers, assuming A and B already sorted */
1363       PetscInt imark = -1;
1364       if (v) {
1365         *v = v_p = mat->rowvalues;
1366         for (i=0; i<nzB; i++) {
1367           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1368           else break;
1369         }
1370         imark = i;
1371         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1372         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1373       }
1374       if (idx) {
1375         *idx = idx_p = mat->rowindices;
1376         if (imark > -1) {
1377           for (i=0; i<imark; i++) {
1378             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1379           }
1380         } else {
1381           for (i=0; i<nzB; i++) {
1382             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1383             else break;
1384           }
1385           imark = i;
1386         }
1387         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1388         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1389       }
1390     } else {
1391       if (idx) *idx = 0;
1392       if (v)   *v   = 0;
1393     }
1394   }
1395   *nz  = nztot;
1396   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1397   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1402 {
1403   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1404 
1405   PetscFunctionBegin;
1406   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1407   baij->getrowactive = PETSC_FALSE;
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1412 {
1413   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1414   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1415 
1416   PetscFunctionBegin;
1417   aA->getrow_utriangular = PETSC_TRUE;
1418   PetscFunctionReturn(0);
1419 }
1420 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1421 {
1422   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1423   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1424 
1425   PetscFunctionBegin;
1426   aA->getrow_utriangular = PETSC_FALSE;
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1431 {
1432   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1433   PetscErrorCode ierr;
1434 
1435   PetscFunctionBegin;
1436   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1437   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1442 {
1443   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1444   PetscErrorCode ierr;
1445 
1446   PetscFunctionBegin;
1447   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1448   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1449   PetscFunctionReturn(0);
1450 }
1451 
1452 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1453    Input: isrow       - distributed(parallel),
1454           iscol_local - locally owned (seq)
1455 */
1456 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1457 {
1458   PetscErrorCode ierr;
1459   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1460   const PetscInt *ptr1,*ptr2;
1461 
1462   PetscFunctionBegin;
1463   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1464   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1465   if (sz1 > sz2) {
1466     *flg = PETSC_FALSE;
1467     PetscFunctionReturn(0);
1468   }
1469 
1470   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1471   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1472 
1473   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1474   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1475   ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr);
1476   ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr);
1477   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1478   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1479 
1480   nmatch=0;
1481   k     = 0;
1482   for (i=0; i<sz1; i++){
1483     for (j=k; j<sz2; j++){
1484       if (a1[i] == a2[j]) {
1485         k = j; nmatch++;
1486         break;
1487       }
1488     }
1489   }
1490   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1491   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1492   ierr = PetscFree(a1);CHKERRQ(ierr);
1493   ierr = PetscFree(a2);CHKERRQ(ierr);
1494   if (nmatch < sz1) {
1495     *flg = PETSC_FALSE;
1496   } else {
1497     *flg = PETSC_TRUE;
1498   }
1499   PetscFunctionReturn(0);
1500 }
1501 
1502 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1503 {
1504   PetscErrorCode ierr;
1505   IS             iscol_local;
1506   PetscInt       csize;
1507   PetscBool      isequal;
1508 
1509   PetscFunctionBegin;
1510   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1511   if (call == MAT_REUSE_MATRIX) {
1512     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1513     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1514   } else {
1515     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1516     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1517     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1518   }
1519 
1520   /* now call MatCreateSubMatrix_MPIBAIJ() */
1521   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1522   if (call == MAT_INITIAL_MATRIX) {
1523     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1524     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1525   }
1526   PetscFunctionReturn(0);
1527 }
1528 
1529 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1530 {
1531   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1532   PetscErrorCode ierr;
1533 
1534   PetscFunctionBegin;
1535   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1536   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1537   PetscFunctionReturn(0);
1538 }
1539 
1540 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1541 {
1542   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1543   Mat            A  = a->A,B = a->B;
1544   PetscErrorCode ierr;
1545   PetscReal      isend[5],irecv[5];
1546 
1547   PetscFunctionBegin;
1548   info->block_size = (PetscReal)matin->rmap->bs;
1549 
1550   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1551 
1552   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1553   isend[3] = info->memory;  isend[4] = info->mallocs;
1554 
1555   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1556 
1557   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1558   isend[3] += info->memory;  isend[4] += info->mallocs;
1559   if (flag == MAT_LOCAL) {
1560     info->nz_used      = isend[0];
1561     info->nz_allocated = isend[1];
1562     info->nz_unneeded  = isend[2];
1563     info->memory       = isend[3];
1564     info->mallocs      = isend[4];
1565   } else if (flag == MAT_GLOBAL_MAX) {
1566     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1567 
1568     info->nz_used      = irecv[0];
1569     info->nz_allocated = irecv[1];
1570     info->nz_unneeded  = irecv[2];
1571     info->memory       = irecv[3];
1572     info->mallocs      = irecv[4];
1573   } else if (flag == MAT_GLOBAL_SUM) {
1574     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1575 
1576     info->nz_used      = irecv[0];
1577     info->nz_allocated = irecv[1];
1578     info->nz_unneeded  = irecv[2];
1579     info->memory       = irecv[3];
1580     info->mallocs      = irecv[4];
1581   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1582   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1583   info->fill_ratio_needed = 0;
1584   info->factor_mallocs    = 0;
1585   PetscFunctionReturn(0);
1586 }
1587 
1588 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1589 {
1590   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1591   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1592   PetscErrorCode ierr;
1593 
1594   PetscFunctionBegin;
1595   switch (op) {
1596   case MAT_NEW_NONZERO_LOCATIONS:
1597   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1598   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1599   case MAT_KEEP_NONZERO_PATTERN:
1600   case MAT_SUBMAT_SINGLEIS:
1601   case MAT_NEW_NONZERO_LOCATION_ERR:
1602     MatCheckPreallocated(A,1);
1603     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1604     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1605     break;
1606   case MAT_ROW_ORIENTED:
1607     MatCheckPreallocated(A,1);
1608     a->roworiented = flg;
1609 
1610     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1611     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1612     break;
1613   case MAT_NEW_DIAGONALS:
1614     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1615     break;
1616   case MAT_IGNORE_OFF_PROC_ENTRIES:
1617     a->donotstash = flg;
1618     break;
1619   case MAT_USE_HASH_TABLE:
1620     a->ht_flag = flg;
1621     break;
1622   case MAT_HERMITIAN:
1623     MatCheckPreallocated(A,1);
1624     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1625     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1626 
1627     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1628     break;
1629   case MAT_SPD:
1630     A->spd_set = PETSC_TRUE;
1631     A->spd     = flg;
1632     if (flg) {
1633       A->symmetric                  = PETSC_TRUE;
1634       A->structurally_symmetric     = PETSC_TRUE;
1635       A->symmetric_set              = PETSC_TRUE;
1636       A->structurally_symmetric_set = PETSC_TRUE;
1637     }
1638     break;
1639   case MAT_SYMMETRIC:
1640     MatCheckPreallocated(A,1);
1641     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1642     break;
1643   case MAT_STRUCTURALLY_SYMMETRIC:
1644     MatCheckPreallocated(A,1);
1645     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1646     break;
1647   case MAT_SYMMETRY_ETERNAL:
1648     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1649     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1650     break;
1651   case MAT_IGNORE_LOWER_TRIANGULAR:
1652     aA->ignore_ltriangular = flg;
1653     break;
1654   case MAT_ERROR_LOWER_TRIANGULAR:
1655     aA->ignore_ltriangular = flg;
1656     break;
1657   case MAT_GETROW_UPPERTRIANGULAR:
1658     aA->getrow_utriangular = flg;
1659     break;
1660   default:
1661     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1662   }
1663   PetscFunctionReturn(0);
1664 }
1665 
1666 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1667 {
1668   PetscErrorCode ierr;
1669 
1670   PetscFunctionBegin;
1671   if (reuse == MAT_INITIAL_MATRIX) {
1672     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1673   }  else if (reuse == MAT_REUSE_MATRIX) {
1674     ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1675   }
1676   PetscFunctionReturn(0);
1677 }
1678 
1679 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1680 {
1681   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1682   Mat            a     = baij->A, b=baij->B;
1683   PetscErrorCode ierr;
1684   PetscInt       nv,m,n;
1685   PetscBool      flg;
1686 
1687   PetscFunctionBegin;
1688   if (ll != rr) {
1689     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1690     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1691   }
1692   if (!ll) PetscFunctionReturn(0);
1693 
1694   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1695   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1696 
1697   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1698   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1699 
1700   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1701 
1702   /* left diagonalscale the off-diagonal part */
1703   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1704 
1705   /* scale the diagonal part */
1706   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1707 
1708   /* right diagonalscale the off-diagonal part */
1709   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1710   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1711   PetscFunctionReturn(0);
1712 }
1713 
1714 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1715 {
1716   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1717   PetscErrorCode ierr;
1718 
1719   PetscFunctionBegin;
1720   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1721   PetscFunctionReturn(0);
1722 }
1723 
1724 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1725 
1726 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1727 {
1728   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1729   Mat            a,b,c,d;
1730   PetscBool      flg;
1731   PetscErrorCode ierr;
1732 
1733   PetscFunctionBegin;
1734   a = matA->A; b = matA->B;
1735   c = matB->A; d = matB->B;
1736 
1737   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1738   if (flg) {
1739     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1740   }
1741   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1746 {
1747   PetscErrorCode ierr;
1748   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1749   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1750 
1751   PetscFunctionBegin;
1752   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1753   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1754     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1755     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1756     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1757   } else {
1758     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1759     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1760   }
1761   PetscFunctionReturn(0);
1762 }
1763 
1764 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1765 {
1766   PetscErrorCode ierr;
1767 
1768   PetscFunctionBegin;
1769   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1770   PetscFunctionReturn(0);
1771 }
1772 
1773 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1774 {
1775   PetscErrorCode ierr;
1776   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1777   PetscBLASInt   bnz,one=1;
1778   Mat_SeqSBAIJ   *xa,*ya;
1779   Mat_SeqBAIJ    *xb,*yb;
1780 
1781   PetscFunctionBegin;
1782   if (str == SAME_NONZERO_PATTERN) {
1783     PetscScalar alpha = a;
1784     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1785     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1786     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1787     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1788     xb   = (Mat_SeqBAIJ*)xx->B->data;
1789     yb   = (Mat_SeqBAIJ*)yy->B->data;
1790     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1791     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1792     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1793   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1794     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1795     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1796     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1797   } else {
1798     Mat      B;
1799     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1800     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1801     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1802     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1803     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1804     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1805     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1806     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1807     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1808     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1809     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1810     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1811     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1812     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1813     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1814     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1815     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1816     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1817     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1818     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1819   }
1820   PetscFunctionReturn(0);
1821 }
1822 
1823 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1824 {
1825   PetscErrorCode ierr;
1826   PetscInt       i;
1827   PetscBool      flg;
1828 
1829   PetscFunctionBegin;
1830   ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */
1831   for (i=0; i<n; i++) {
1832     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1833     if (!flg) {
1834       ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr);
1835     }
1836   }
1837   PetscFunctionReturn(0);
1838 }
1839 
1840 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1841 {
1842   PetscErrorCode ierr;
1843   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1844   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1845 
1846   PetscFunctionBegin;
1847   if (!Y->preallocated) {
1848     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1849   } else if (!aij->nz) {
1850     PetscInt nonew = aij->nonew;
1851     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1852     aij->nonew = nonew;
1853   }
1854   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1855   PetscFunctionReturn(0);
1856 }
1857 
1858 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1859 {
1860   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1861   PetscErrorCode ierr;
1862 
1863   PetscFunctionBegin;
1864   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1865   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
1866   if (d) {
1867     PetscInt rstart;
1868     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1869     *d += rstart/A->rmap->bs;
1870 
1871   }
1872   PetscFunctionReturn(0);
1873 }
1874 
1875 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1876 {
1877   PetscFunctionBegin;
1878   *a = ((Mat_MPISBAIJ*)A->data)->A;
1879   PetscFunctionReturn(0);
1880 }
1881 
1882 /* -------------------------------------------------------------------*/
1883 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1884                                        MatGetRow_MPISBAIJ,
1885                                        MatRestoreRow_MPISBAIJ,
1886                                        MatMult_MPISBAIJ,
1887                                /*  4*/ MatMultAdd_MPISBAIJ,
1888                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1889                                        MatMultAdd_MPISBAIJ,
1890                                        0,
1891                                        0,
1892                                        0,
1893                                /* 10*/ 0,
1894                                        0,
1895                                        0,
1896                                        MatSOR_MPISBAIJ,
1897                                        MatTranspose_MPISBAIJ,
1898                                /* 15*/ MatGetInfo_MPISBAIJ,
1899                                        MatEqual_MPISBAIJ,
1900                                        MatGetDiagonal_MPISBAIJ,
1901                                        MatDiagonalScale_MPISBAIJ,
1902                                        MatNorm_MPISBAIJ,
1903                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1904                                        MatAssemblyEnd_MPISBAIJ,
1905                                        MatSetOption_MPISBAIJ,
1906                                        MatZeroEntries_MPISBAIJ,
1907                                /* 24*/ 0,
1908                                        0,
1909                                        0,
1910                                        0,
1911                                        0,
1912                                /* 29*/ MatSetUp_MPISBAIJ,
1913                                        0,
1914                                        0,
1915                                        MatGetDiagonalBlock_MPISBAIJ,
1916                                        0,
1917                                /* 34*/ MatDuplicate_MPISBAIJ,
1918                                        0,
1919                                        0,
1920                                        0,
1921                                        0,
1922                                /* 39*/ MatAXPY_MPISBAIJ,
1923                                        MatCreateSubMatrices_MPISBAIJ,
1924                                        MatIncreaseOverlap_MPISBAIJ,
1925                                        MatGetValues_MPISBAIJ,
1926                                        MatCopy_MPISBAIJ,
1927                                /* 44*/ 0,
1928                                        MatScale_MPISBAIJ,
1929                                        MatShift_MPISBAIJ,
1930                                        0,
1931                                        0,
1932                                /* 49*/ 0,
1933                                        0,
1934                                        0,
1935                                        0,
1936                                        0,
1937                                /* 54*/ 0,
1938                                        0,
1939                                        MatSetUnfactored_MPISBAIJ,
1940                                        0,
1941                                        MatSetValuesBlocked_MPISBAIJ,
1942                                /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1943                                        0,
1944                                        0,
1945                                        0,
1946                                        0,
1947                                /* 64*/ 0,
1948                                        0,
1949                                        0,
1950                                        0,
1951                                        0,
1952                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1953                                        0,
1954                                        0,
1955                                        0,
1956                                        0,
1957                                /* 74*/ 0,
1958                                        0,
1959                                        0,
1960                                        0,
1961                                        0,
1962                                /* 79*/ 0,
1963                                        0,
1964                                        0,
1965                                        0,
1966                                        MatLoad_MPISBAIJ,
1967                                /* 84*/ 0,
1968                                        0,
1969                                        0,
1970                                        0,
1971                                        0,
1972                                /* 89*/ 0,
1973                                        0,
1974                                        0,
1975                                        0,
1976                                        0,
1977                                /* 94*/ 0,
1978                                        0,
1979                                        0,
1980                                        0,
1981                                        0,
1982                                /* 99*/ 0,
1983                                        0,
1984                                        0,
1985                                        0,
1986                                        0,
1987                                /*104*/ 0,
1988                                        MatRealPart_MPISBAIJ,
1989                                        MatImaginaryPart_MPISBAIJ,
1990                                        MatGetRowUpperTriangular_MPISBAIJ,
1991                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1992                                /*109*/ 0,
1993                                        0,
1994                                        0,
1995                                        0,
1996                                        MatMissingDiagonal_MPISBAIJ,
1997                                /*114*/ 0,
1998                                        0,
1999                                        0,
2000                                        0,
2001                                        0,
2002                                /*119*/ 0,
2003                                        0,
2004                                        0,
2005                                        0,
2006                                        0,
2007                                /*124*/ 0,
2008                                        0,
2009                                        0,
2010                                        0,
2011                                        0,
2012                                /*129*/ 0,
2013                                        0,
2014                                        0,
2015                                        0,
2016                                        0,
2017                                /*134*/ 0,
2018                                        0,
2019                                        0,
2020                                        0,
2021                                        0,
2022                                /*139*/ MatSetBlockSizes_Default,
2023                                        0,
2024                                        0,
2025                                        0,
2026                                        0,
2027                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
2028 };
2029 
2030 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2031 {
2032   Mat_MPISBAIJ   *b;
2033   PetscErrorCode ierr;
2034   PetscInt       i,mbs,Mbs;
2035 
2036   PetscFunctionBegin;
2037   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2038   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2039   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2040   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2041 
2042   b   = (Mat_MPISBAIJ*)B->data;
2043   mbs = B->rmap->n/bs;
2044   Mbs = B->rmap->N/bs;
2045   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
2046 
2047   B->rmap->bs = bs;
2048   b->bs2      = bs*bs;
2049   b->mbs      = mbs;
2050   b->Mbs      = Mbs;
2051   b->nbs      = B->cmap->n/bs;
2052   b->Nbs      = B->cmap->N/bs;
2053 
2054   for (i=0; i<=b->size; i++) {
2055     b->rangebs[i] = B->rmap->range[i]/bs;
2056   }
2057   b->rstartbs = B->rmap->rstart/bs;
2058   b->rendbs   = B->rmap->rend/bs;
2059 
2060   b->cstartbs = B->cmap->rstart/bs;
2061   b->cendbs   = B->cmap->rend/bs;
2062 
2063 #if defined(PETSC_USE_CTABLE)
2064   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2065 #else
2066   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2067 #endif
2068   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2069   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2070   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2071   ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr);
2072   ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr);
2073   ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr);
2074   ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr);
2075   ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr);
2076   ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr);
2077 
2078   /* Because the B will have been resized we simply destroy it and create a new one each time */
2079   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2080   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2081   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2082   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2083   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2084 
2085   if (!B->preallocated) {
2086     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2087     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2088     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
2089     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2090     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2091   }
2092 
2093   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2094   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2095 
2096   B->preallocated  = PETSC_TRUE;
2097   B->was_assembled = PETSC_FALSE;
2098   B->assembled     = PETSC_FALSE;
2099   PetscFunctionReturn(0);
2100 }
2101 
2102 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2103 {
2104   PetscInt       m,rstart,cstart,cend;
2105   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2106   const PetscInt *JJ    =0;
2107   PetscScalar    *values=0;
2108   PetscErrorCode ierr;
2109 
2110   PetscFunctionBegin;
2111   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2112   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2113   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2114   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2115   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2116   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2117   m      = B->rmap->n/bs;
2118   rstart = B->rmap->rstart/bs;
2119   cstart = B->cmap->rstart/bs;
2120   cend   = B->cmap->rend/bs;
2121 
2122   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2123   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2124   for (i=0; i<m; i++) {
2125     nz = ii[i+1] - ii[i];
2126     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2127     nz_max = PetscMax(nz_max,nz);
2128     JJ     = jj + ii[i];
2129     for (j=0; j<nz; j++) {
2130       if (*JJ >= cstart) break;
2131       JJ++;
2132     }
2133     d = 0;
2134     for (; j<nz; j++) {
2135       if (*JJ++ >= cend) break;
2136       d++;
2137     }
2138     d_nnz[i] = d;
2139     o_nnz[i] = nz - d;
2140   }
2141   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2142   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2143 
2144   values = (PetscScalar*)V;
2145   if (!values) {
2146     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2147     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2148   }
2149   for (i=0; i<m; i++) {
2150     PetscInt          row    = i + rstart;
2151     PetscInt          ncols  = ii[i+1] - ii[i];
2152     const PetscInt    *icols = jj + ii[i];
2153     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2154     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2155   }
2156 
2157   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2158   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2159   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2160   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2161   PetscFunctionReturn(0);
2162 }
2163 
2164 /*MC
2165    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2166    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2167    the matrix is stored.
2168 
2169   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2170   can call MatSetOption(Mat, MAT_HERMITIAN);
2171 
2172    Options Database Keys:
2173 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2174 
2175   Level: beginner
2176 
2177 .seealso: MatCreateMPISBAIJ
2178 M*/
2179 
2180 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
2181 
2182 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2183 {
2184   Mat_MPISBAIJ   *b;
2185   PetscErrorCode ierr;
2186   PetscBool      flg = PETSC_FALSE;
2187 
2188   PetscFunctionBegin;
2189   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2190   B->data = (void*)b;
2191   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2192 
2193   B->ops->destroy = MatDestroy_MPISBAIJ;
2194   B->ops->view    = MatView_MPISBAIJ;
2195   B->assembled    = PETSC_FALSE;
2196   B->insertmode   = NOT_SET_VALUES;
2197 
2198   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2199   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2200 
2201   /* build local table of row and column ownerships */
2202   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2203 
2204   /* build cache for off array entries formed */
2205   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2206 
2207   b->donotstash  = PETSC_FALSE;
2208   b->colmap      = NULL;
2209   b->garray      = NULL;
2210   b->roworiented = PETSC_TRUE;
2211 
2212   /* stuff used in block assembly */
2213   b->barray = 0;
2214 
2215   /* stuff used for matrix vector multiply */
2216   b->lvec    = 0;
2217   b->Mvctx   = 0;
2218   b->slvec0  = 0;
2219   b->slvec0b = 0;
2220   b->slvec1  = 0;
2221   b->slvec1a = 0;
2222   b->slvec1b = 0;
2223   b->sMvctx  = 0;
2224 
2225   /* stuff for MatGetRow() */
2226   b->rowindices   = 0;
2227   b->rowvalues    = 0;
2228   b->getrowactive = PETSC_FALSE;
2229 
2230   /* hash table stuff */
2231   b->ht           = 0;
2232   b->hd           = 0;
2233   b->ht_size      = 0;
2234   b->ht_flag      = PETSC_FALSE;
2235   b->ht_fact      = 0;
2236   b->ht_total_ct  = 0;
2237   b->ht_insert_ct = 0;
2238 
2239   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2240   b->ijonly = PETSC_FALSE;
2241 
2242   b->in_loc = 0;
2243   b->v_loc  = 0;
2244   b->n_loc  = 0;
2245 
2246   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2247   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2248   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2249   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2250 #if defined(PETSC_HAVE_ELEMENTAL)
2251   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2252 #endif
2253 
2254   B->symmetric                  = PETSC_TRUE;
2255   B->structurally_symmetric     = PETSC_TRUE;
2256   B->symmetric_set              = PETSC_TRUE;
2257   B->structurally_symmetric_set = PETSC_TRUE;
2258 
2259   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2260   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2261   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2262   if (flg) {
2263     PetscReal fact = 1.39;
2264     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2265     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2266     if (fact <= 1.0) fact = 1.39;
2267     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2268     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2269   }
2270   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2271   PetscFunctionReturn(0);
2272 }
2273 
2274 /*MC
2275    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2276 
2277    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2278    and MATMPISBAIJ otherwise.
2279 
2280    Options Database Keys:
2281 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2282 
2283   Level: beginner
2284 
2285 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
2286 M*/
2287 
2288 /*@C
2289    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2290    the user should preallocate the matrix storage by setting the parameters
2291    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2292    performance can be increased by more than a factor of 50.
2293 
2294    Collective on Mat
2295 
2296    Input Parameters:
2297 +  B - the matrix
2298 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2299           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2300 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2301            submatrix  (same for all local rows)
2302 .  d_nnz - array containing the number of block nonzeros in the various block rows
2303            in the upper triangular and diagonal part of the in diagonal portion of the local
2304            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2305            for the diagonal entry and set a value even if it is zero.
2306 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2307            submatrix (same for all local rows).
2308 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2309            off-diagonal portion of the local submatrix that is right of the diagonal
2310            (possibly different for each block row) or NULL.
2311 
2312 
2313    Options Database Keys:
2314 .   -mat_no_unroll - uses code that does not unroll the loops in the
2315                      block calculations (much slower)
2316 .   -mat_block_size - size of the blocks to use
2317 
2318    Notes:
2319 
2320    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2321    than it must be used on all processors that share the object for that argument.
2322 
2323    If the *_nnz parameter is given then the *_nz parameter is ignored
2324 
2325    Storage Information:
2326    For a square global matrix we define each processor's diagonal portion
2327    to be its local rows and the corresponding columns (a square submatrix);
2328    each processor's off-diagonal portion encompasses the remainder of the
2329    local matrix (a rectangular submatrix).
2330 
2331    The user can specify preallocated storage for the diagonal part of
2332    the local submatrix with either d_nz or d_nnz (not both).  Set
2333    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2334    memory allocation.  Likewise, specify preallocated storage for the
2335    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2336 
2337    You can call MatGetInfo() to get information on how effective the preallocation was;
2338    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2339    You can also run with the option -info and look for messages with the string
2340    malloc in them to see if additional memory allocation was needed.
2341 
2342    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2343    the figure below we depict these three local rows and all columns (0-11).
2344 
2345 .vb
2346            0 1 2 3 4 5 6 7 8 9 10 11
2347           --------------------------
2348    row 3  |. . . d d d o o o o  o  o
2349    row 4  |. . . d d d o o o o  o  o
2350    row 5  |. . . d d d o o o o  o  o
2351           --------------------------
2352 .ve
2353 
2354    Thus, any entries in the d locations are stored in the d (diagonal)
2355    submatrix, and any entries in the o locations are stored in the
2356    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2357    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2358 
2359    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2360    plus the diagonal part of the d matrix,
2361    and o_nz should indicate the number of block nonzeros per row in the o matrix
2362 
2363    In general, for PDE problems in which most nonzeros are near the diagonal,
2364    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2365    or you will get TERRIBLE performance; see the users' manual chapter on
2366    matrices.
2367 
2368    Level: intermediate
2369 
2370 .keywords: matrix, block, aij, compressed row, sparse, parallel
2371 
2372 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2373 @*/
2374 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2375 {
2376   PetscErrorCode ierr;
2377 
2378   PetscFunctionBegin;
2379   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2380   PetscValidType(B,1);
2381   PetscValidLogicalCollectiveInt(B,bs,2);
2382   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2383   PetscFunctionReturn(0);
2384 }
2385 
2386 /*@C
2387    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2388    (block compressed row).  For good matrix assembly performance
2389    the user should preallocate the matrix storage by setting the parameters
2390    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2391    performance can be increased by more than a factor of 50.
2392 
2393    Collective on MPI_Comm
2394 
2395    Input Parameters:
2396 +  comm - MPI communicator
2397 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2398           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2399 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2400            This value should be the same as the local size used in creating the
2401            y vector for the matrix-vector product y = Ax.
2402 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2403            This value should be the same as the local size used in creating the
2404            x vector for the matrix-vector product y = Ax.
2405 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2406 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2407 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2408            submatrix  (same for all local rows)
2409 .  d_nnz - array containing the number of block nonzeros in the various block rows
2410            in the upper triangular portion of the in diagonal portion of the local
2411            (possibly different for each block block row) or NULL.
2412            If you plan to factor the matrix you must leave room for the diagonal entry and
2413            set its value even if it is zero.
2414 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2415            submatrix (same for all local rows).
2416 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2417            off-diagonal portion of the local submatrix (possibly different for
2418            each block row) or NULL.
2419 
2420    Output Parameter:
2421 .  A - the matrix
2422 
2423    Options Database Keys:
2424 .   -mat_no_unroll - uses code that does not unroll the loops in the
2425                      block calculations (much slower)
2426 .   -mat_block_size - size of the blocks to use
2427 .   -mat_mpi - use the parallel matrix data structures even on one processor
2428                (defaults to using SeqBAIJ format on one processor)
2429 
2430    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2431    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2432    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2433 
2434    Notes:
2435    The number of rows and columns must be divisible by blocksize.
2436    This matrix type does not support complex Hermitian operation.
2437 
2438    The user MUST specify either the local or global matrix dimensions
2439    (possibly both).
2440 
2441    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2442    than it must be used on all processors that share the object for that argument.
2443 
2444    If the *_nnz parameter is given then the *_nz parameter is ignored
2445 
2446    Storage Information:
2447    For a square global matrix we define each processor's diagonal portion
2448    to be its local rows and the corresponding columns (a square submatrix);
2449    each processor's off-diagonal portion encompasses the remainder of the
2450    local matrix (a rectangular submatrix).
2451 
2452    The user can specify preallocated storage for the diagonal part of
2453    the local submatrix with either d_nz or d_nnz (not both).  Set
2454    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2455    memory allocation.  Likewise, specify preallocated storage for the
2456    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2457 
2458    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2459    the figure below we depict these three local rows and all columns (0-11).
2460 
2461 .vb
2462            0 1 2 3 4 5 6 7 8 9 10 11
2463           --------------------------
2464    row 3  |. . . d d d o o o o  o  o
2465    row 4  |. . . d d d o o o o  o  o
2466    row 5  |. . . d d d o o o o  o  o
2467           --------------------------
2468 .ve
2469 
2470    Thus, any entries in the d locations are stored in the d (diagonal)
2471    submatrix, and any entries in the o locations are stored in the
2472    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2473    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2474 
2475    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2476    plus the diagonal part of the d matrix,
2477    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2478    In general, for PDE problems in which most nonzeros are near the diagonal,
2479    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2480    or you will get TERRIBLE performance; see the users' manual chapter on
2481    matrices.
2482 
2483    Level: intermediate
2484 
2485 .keywords: matrix, block, aij, compressed row, sparse, parallel
2486 
2487 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2488 @*/
2489 
2490 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2491 {
2492   PetscErrorCode ierr;
2493   PetscMPIInt    size;
2494 
2495   PetscFunctionBegin;
2496   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2497   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2498   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2499   if (size > 1) {
2500     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2501     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2502   } else {
2503     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2504     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2505   }
2506   PetscFunctionReturn(0);
2507 }
2508 
2509 
2510 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2511 {
2512   Mat            mat;
2513   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2514   PetscErrorCode ierr;
2515   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2516   PetscScalar    *array;
2517 
2518   PetscFunctionBegin;
2519   *newmat = 0;
2520 
2521   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2522   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2523   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2524   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2525   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2526   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2527 
2528   mat->factortype   = matin->factortype;
2529   mat->preallocated = PETSC_TRUE;
2530   mat->assembled    = PETSC_TRUE;
2531   mat->insertmode   = NOT_SET_VALUES;
2532 
2533   a      = (Mat_MPISBAIJ*)mat->data;
2534   a->bs2 = oldmat->bs2;
2535   a->mbs = oldmat->mbs;
2536   a->nbs = oldmat->nbs;
2537   a->Mbs = oldmat->Mbs;
2538   a->Nbs = oldmat->Nbs;
2539 
2540 
2541   a->size         = oldmat->size;
2542   a->rank         = oldmat->rank;
2543   a->donotstash   = oldmat->donotstash;
2544   a->roworiented  = oldmat->roworiented;
2545   a->rowindices   = 0;
2546   a->rowvalues    = 0;
2547   a->getrowactive = PETSC_FALSE;
2548   a->barray       = 0;
2549   a->rstartbs     = oldmat->rstartbs;
2550   a->rendbs       = oldmat->rendbs;
2551   a->cstartbs     = oldmat->cstartbs;
2552   a->cendbs       = oldmat->cendbs;
2553 
2554   /* hash table stuff */
2555   a->ht           = 0;
2556   a->hd           = 0;
2557   a->ht_size      = 0;
2558   a->ht_flag      = oldmat->ht_flag;
2559   a->ht_fact      = oldmat->ht_fact;
2560   a->ht_total_ct  = 0;
2561   a->ht_insert_ct = 0;
2562 
2563   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2564   if (oldmat->colmap) {
2565 #if defined(PETSC_USE_CTABLE)
2566     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2567 #else
2568     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2569     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2570     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2571 #endif
2572   } else a->colmap = 0;
2573 
2574   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2575     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2576     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2577     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2578   } else a->garray = 0;
2579 
2580   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2581   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2582   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2583   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2584   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2585 
2586   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2587   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2588   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2589   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2590 
2591   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2592   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2593   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2594   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2595   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2596   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2597   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2598   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2599   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2600   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2601   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2602   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2603   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2604 
2605   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2606   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2607   a->sMvctx = oldmat->sMvctx;
2608   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2609 
2610   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2611   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2612   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2613   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2614   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2615   *newmat = mat;
2616   PetscFunctionReturn(0);
2617 }
2618 
2619 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2620 {
2621   PetscErrorCode ierr;
2622   PetscInt       i,nz,j,rstart,rend;
2623   PetscScalar    *vals,*buf;
2624   MPI_Comm       comm;
2625   MPI_Status     status;
2626   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2627   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2628   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2629   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2630   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2631   PetscInt       dcount,kmax,k,nzcount,tmp;
2632   int            fd;
2633 
2634   PetscFunctionBegin;
2635   /* force binary viewer to load .info file if it has not yet done so */
2636   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2637   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2638   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2639   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2640   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2641   if (bs < 0) bs = 1;
2642 
2643   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2644   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2645   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2646   if (!rank) {
2647     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2648     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2649     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2650   }
2651 
2652   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2653   M    = header[1];
2654   N    = header[2];
2655 
2656   /* If global sizes are set, check if they are consistent with that given in the file */
2657   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
2658   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
2659 
2660   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2661 
2662   /*
2663      This code adds extra rows to make sure the number of rows is
2664      divisible by the blocksize
2665   */
2666   Mbs        = M/bs;
2667   extra_rows = bs - M + bs*(Mbs);
2668   if (extra_rows == bs) extra_rows = 0;
2669   else                  Mbs++;
2670   if (extra_rows &&!rank) {
2671     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2672   }
2673 
2674   /* determine ownership of all rows */
2675   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2676     mbs = Mbs/size + ((Mbs % size) > rank);
2677     m   = mbs*bs;
2678   } else { /* User Set */
2679     m   = newmat->rmap->n;
2680     mbs = m/bs;
2681   }
2682   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2683   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2684   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2685   rowners[0] = 0;
2686   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2687   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2688   rstart = rowners[rank];
2689   rend   = rowners[rank+1];
2690 
2691   /* distribute row lengths to all processors */
2692   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2693   if (!rank) {
2694     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2695     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2696     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2697     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2698     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2699     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2700     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2701   } else {
2702     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2703   }
2704 
2705   if (!rank) {   /* procs[0] */
2706     /* calculate the number of nonzeros on each processor */
2707     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2708     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2709     for (i=0; i<size; i++) {
2710       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2711         procsnz[i] += rowlengths[j];
2712       }
2713     }
2714     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2715 
2716     /* determine max buffer needed and allocate it */
2717     maxnz = 0;
2718     for (i=0; i<size; i++) {
2719       maxnz = PetscMax(maxnz,procsnz[i]);
2720     }
2721     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2722 
2723     /* read in my part of the matrix column indices  */
2724     nz     = procsnz[0];
2725     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2726     mycols = ibuf;
2727     if (size == 1) nz -= extra_rows;
2728     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2729     if (size == 1) {
2730       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2731     }
2732 
2733     /* read in every ones (except the last) and ship off */
2734     for (i=1; i<size-1; i++) {
2735       nz   = procsnz[i];
2736       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2737       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2738     }
2739     /* read in the stuff for the last proc */
2740     if (size != 1) {
2741       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2742       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2743       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2744       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2745     }
2746     ierr = PetscFree(cols);CHKERRQ(ierr);
2747   } else {  /* procs[i], i>0 */
2748     /* determine buffer space needed for message */
2749     nz = 0;
2750     for (i=0; i<m; i++) nz += locrowlens[i];
2751     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2752     mycols = ibuf;
2753     /* receive message of column indices*/
2754     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2755     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2756     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2757   }
2758 
2759   /* loop over local rows, determining number of off diagonal entries */
2760   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2761   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2762   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2763   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2764   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2765   rowcount = 0;
2766   nzcount  = 0;
2767   for (i=0; i<mbs; i++) {
2768     dcount  = 0;
2769     odcount = 0;
2770     for (j=0; j<bs; j++) {
2771       kmax = locrowlens[rowcount];
2772       for (k=0; k<kmax; k++) {
2773         tmp = mycols[nzcount++]/bs; /* block col. index */
2774         if (!mask[tmp]) {
2775           mask[tmp] = 1;
2776           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2777           else masked1[dcount++] = tmp; /* entry in diag portion */
2778         }
2779       }
2780       rowcount++;
2781     }
2782 
2783     dlens[i]  = dcount;  /* d_nzz[i] */
2784     odlens[i] = odcount; /* o_nzz[i] */
2785 
2786     /* zero out the mask elements we set */
2787     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2788     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2789   }
2790   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2791   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2792   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2793 
2794   if (!rank) {
2795     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2796     /* read in my part of the matrix numerical values  */
2797     nz     = procsnz[0];
2798     vals   = buf;
2799     mycols = ibuf;
2800     if (size == 1) nz -= extra_rows;
2801     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2802     if (size == 1) {
2803       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2804     }
2805 
2806     /* insert into matrix */
2807     jj = rstart*bs;
2808     for (i=0; i<m; i++) {
2809       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2810       mycols += locrowlens[i];
2811       vals   += locrowlens[i];
2812       jj++;
2813     }
2814 
2815     /* read in other processors (except the last one) and ship out */
2816     for (i=1; i<size-1; i++) {
2817       nz   = procsnz[i];
2818       vals = buf;
2819       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2820       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2821     }
2822     /* the last proc */
2823     if (size != 1) {
2824       nz   = procsnz[i] - extra_rows;
2825       vals = buf;
2826       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2827       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2828       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2829     }
2830     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2831 
2832   } else {
2833     /* receive numeric values */
2834     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2835 
2836     /* receive message of values*/
2837     vals   = buf;
2838     mycols = ibuf;
2839     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2840     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2841     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2842 
2843     /* insert into matrix */
2844     jj = rstart*bs;
2845     for (i=0; i<m; i++) {
2846       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2847       mycols += locrowlens[i];
2848       vals   += locrowlens[i];
2849       jj++;
2850     }
2851   }
2852 
2853   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2854   ierr = PetscFree(buf);CHKERRQ(ierr);
2855   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2856   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2857   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2858   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2859   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2860   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2861   PetscFunctionReturn(0);
2862 }
2863 
2864 /*XXXXX@
2865    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2866 
2867    Input Parameters:
2868 .  mat  - the matrix
2869 .  fact - factor
2870 
2871    Not Collective on Mat, each process can have a different hash factor
2872 
2873    Level: advanced
2874 
2875   Notes:
2876    This can also be set by the command line option: -mat_use_hash_table fact
2877 
2878 .keywords: matrix, hashtable, factor, HT
2879 
2880 .seealso: MatSetOption()
2881 @XXXXX*/
2882 
2883 
2884 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2885 {
2886   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2887   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2888   PetscReal      atmp;
2889   PetscReal      *work,*svalues,*rvalues;
2890   PetscErrorCode ierr;
2891   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2892   PetscMPIInt    rank,size;
2893   PetscInt       *rowners_bs,dest,count,source;
2894   PetscScalar    *va;
2895   MatScalar      *ba;
2896   MPI_Status     stat;
2897 
2898   PetscFunctionBegin;
2899   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2900   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2901   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2902 
2903   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2904   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2905 
2906   bs  = A->rmap->bs;
2907   mbs = a->mbs;
2908   Mbs = a->Mbs;
2909   ba  = b->a;
2910   bi  = b->i;
2911   bj  = b->j;
2912 
2913   /* find ownerships */
2914   rowners_bs = A->rmap->range;
2915 
2916   /* each proc creates an array to be distributed */
2917   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2918   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2919 
2920   /* row_max for B */
2921   if (rank != size-1) {
2922     for (i=0; i<mbs; i++) {
2923       ncols = bi[1] - bi[0]; bi++;
2924       brow  = bs*i;
2925       for (j=0; j<ncols; j++) {
2926         bcol = bs*(*bj);
2927         for (kcol=0; kcol<bs; kcol++) {
2928           col  = bcol + kcol;                /* local col index */
2929           col += rowners_bs[rank+1];      /* global col index */
2930           for (krow=0; krow<bs; krow++) {
2931             atmp = PetscAbsScalar(*ba); ba++;
2932             row  = brow + krow;   /* local row index */
2933             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2934             if (work[col] < atmp) work[col] = atmp;
2935           }
2936         }
2937         bj++;
2938       }
2939     }
2940 
2941     /* send values to its owners */
2942     for (dest=rank+1; dest<size; dest++) {
2943       svalues = work + rowners_bs[dest];
2944       count   = rowners_bs[dest+1]-rowners_bs[dest];
2945       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2946     }
2947   }
2948 
2949   /* receive values */
2950   if (rank) {
2951     rvalues = work;
2952     count   = rowners_bs[rank+1]-rowners_bs[rank];
2953     for (source=0; source<rank; source++) {
2954       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2955       /* process values */
2956       for (i=0; i<count; i++) {
2957         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2958       }
2959     }
2960   }
2961 
2962   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2963   ierr = PetscFree(work);CHKERRQ(ierr);
2964   PetscFunctionReturn(0);
2965 }
2966 
2967 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2968 {
2969   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2970   PetscErrorCode    ierr;
2971   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2972   PetscScalar       *x,*ptr,*from;
2973   Vec               bb1;
2974   const PetscScalar *b;
2975 
2976   PetscFunctionBegin;
2977   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2978   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2979 
2980   if (flag == SOR_APPLY_UPPER) {
2981     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2982     PetscFunctionReturn(0);
2983   }
2984 
2985   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2986     if (flag & SOR_ZERO_INITIAL_GUESS) {
2987       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2988       its--;
2989     }
2990 
2991     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2992     while (its--) {
2993 
2994       /* lower triangular part: slvec0b = - B^T*xx */
2995       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2996 
2997       /* copy xx into slvec0a */
2998       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2999       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3000       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3001       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
3002 
3003       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
3004 
3005       /* copy bb into slvec1a */
3006       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3007       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3008       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3009       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3010 
3011       /* set slvec1b = 0 */
3012       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3013 
3014       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3015       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3016       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3017       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3018 
3019       /* upper triangular part: bb1 = bb1 - B*x */
3020       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
3021 
3022       /* local diagonal sweep */
3023       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3024     }
3025     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3026   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3027     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3028   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3029     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3030   } else if (flag & SOR_EISENSTAT) {
3031     Vec               xx1;
3032     PetscBool         hasop;
3033     const PetscScalar *diag;
3034     PetscScalar       *sl,scale = (omega - 2.0)/omega;
3035     PetscInt          i,n;
3036 
3037     if (!mat->xx1) {
3038       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
3039       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
3040     }
3041     xx1 = mat->xx1;
3042     bb1 = mat->bb1;
3043 
3044     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
3045 
3046     if (!mat->diag) {
3047       /* this is wrong for same matrix with new nonzero values */
3048       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
3049       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
3050     }
3051     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
3052 
3053     if (hasop) {
3054       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
3055       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3056     } else {
3057       /*
3058           These two lines are replaced by code that may be a bit faster for a good compiler
3059       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
3060       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3061       */
3062       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3063       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3064       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3065       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3066       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
3067       if (omega == 1.0) {
3068         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
3069         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
3070       } else {
3071         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
3072         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
3073       }
3074       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3075       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3076       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3077       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3078     }
3079 
3080     /* multiply off-diagonal portion of matrix */
3081     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3082     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
3083     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
3084     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3085     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3086     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
3087     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3088     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3089     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3090     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
3091 
3092     /* local sweep */
3093     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
3094     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
3095   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3096   PetscFunctionReturn(0);
3097 }
3098 
3099 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
3100 {
3101   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
3102   PetscErrorCode ierr;
3103   Vec            lvec1,bb1;
3104 
3105   PetscFunctionBegin;
3106   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3107   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3108 
3109   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3110     if (flag & SOR_ZERO_INITIAL_GUESS) {
3111       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
3112       its--;
3113     }
3114 
3115     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
3116     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
3117     while (its--) {
3118       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3119 
3120       /* lower diagonal part: bb1 = bb - B^T*xx */
3121       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
3122       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
3123 
3124       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3125       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
3126       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3127 
3128       /* upper diagonal part: bb1 = bb1 - B*x */
3129       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
3130       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
3131 
3132       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3133 
3134       /* diagonal sweep */
3135       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3136     }
3137     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
3138     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3139   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3140   PetscFunctionReturn(0);
3141 }
3142 
3143 /*@
3144      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
3145          CSR format the local rows.
3146 
3147    Collective on MPI_Comm
3148 
3149    Input Parameters:
3150 +  comm - MPI communicator
3151 .  bs - the block size, only a block size of 1 is supported
3152 .  m - number of local rows (Cannot be PETSC_DECIDE)
3153 .  n - This value should be the same as the local size used in creating the
3154        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3155        calculated if N is given) For square matrices n is almost always m.
3156 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3157 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3158 .   i - row indices
3159 .   j - column indices
3160 -   a - matrix values
3161 
3162    Output Parameter:
3163 .   mat - the matrix
3164 
3165    Level: intermediate
3166 
3167    Notes:
3168        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3169      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3170      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3171 
3172        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3173 
3174 .keywords: matrix, aij, compressed row, sparse, parallel
3175 
3176 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3177           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3178 @*/
3179 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3180 {
3181   PetscErrorCode ierr;
3182 
3183 
3184   PetscFunctionBegin;
3185   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3186   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3187   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3188   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3189   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3190   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3191   PetscFunctionReturn(0);
3192 }
3193 
3194 
3195 /*@C
3196    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
3197    (the default parallel PETSc format).
3198 
3199    Collective on MPI_Comm
3200 
3201    Input Parameters:
3202 +  B - the matrix
3203 .  bs - the block size
3204 .  i - the indices into j for the start of each local row (starts with zero)
3205 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3206 -  v - optional values in the matrix
3207 
3208    Level: developer
3209 
3210 .keywords: matrix, aij, compressed row, sparse, parallel
3211 
3212 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
3213 @*/
3214 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3215 {
3216   PetscErrorCode ierr;
3217 
3218   PetscFunctionBegin;
3219   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3220   PetscFunctionReturn(0);
3221 }
3222 
3223 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3224 {
3225   PetscErrorCode ierr;
3226   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3227   PetscInt       *indx;
3228   PetscScalar    *values;
3229 
3230   PetscFunctionBegin;
3231   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3232   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3233     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
3234     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
3235     PetscInt       *bindx,rmax=a->rmax,j;
3236 
3237     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3238     mbs = m/bs; Nbs = N/cbs;
3239     if (n == PETSC_DECIDE) {
3240       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
3241     }
3242     /* Check sum(n) = Nbs */
3243     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3244     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
3245 
3246     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3247     rstart -= mbs;
3248 
3249     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3250     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
3251     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3252     for (i=0; i<mbs; i++) {
3253       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3254       nnz = nnz/bs;
3255       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3256       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3257       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3258     }
3259     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3260     ierr = PetscFree(bindx);CHKERRQ(ierr);
3261 
3262     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3263     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3264     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3265     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
3266     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3267     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3268   }
3269 
3270   /* numeric phase */
3271   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3272   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3273 
3274   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3275   for (i=0; i<m; i++) {
3276     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3277     Ii   = i + rstart;
3278     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3279     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3280   }
3281   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3282   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3283   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3284   PetscFunctionReturn(0);
3285 }
3286