xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 2205254efee3a00a594e5e2a3a70f74dcb40bc03)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 EXTERN_C_BEGIN
24 #undef __FUNCT__
25 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
26 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
27 {
28   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
29   PetscErrorCode ierr;
30 
31   PetscFunctionBegin;
32   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
33   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN_C_BEGIN
39 #undef __FUNCT__
40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
41 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
42 {
43   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
48   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 EXTERN_C_END
52 
53 
54 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
55 { \
56  \
57     brow = row/bs;  \
58     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
59     rmax = aimax[brow]; nrow = ailen[brow]; \
60       bcol = col/bs; \
61       ridx = row % bs; cidx = col % bs; \
62       low = 0; high = nrow; \
63       while (high-low > 3) { \
64         t = (low+high)/2; \
65         if (rp[t] > bcol) high = t; \
66         else              low  = t; \
67       } \
68       for (_i=low; _i<high; _i++) { \
69         if (rp[_i] > bcol) break; \
70         if (rp[_i] == bcol) { \
71           bap  = ap +  bs2*_i + bs*cidx + ridx; \
72           if (addv == ADD_VALUES) *bap += value;  \
73           else                    *bap  = value;  \
74           goto a_noinsert; \
75         } \
76       } \
77       if (a->nonew == 1) goto a_noinsert; \
78       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
79       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
80       N = nrow++ - 1;  \
81       /* shift up all the later entries in this row */ \
82       for (ii=N; ii>=_i; ii--) { \
83         rp[ii+1] = rp[ii]; \
84         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
85       } \
86       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
87       rp[_i]                      = bcol;  \
88       ap[bs2*_i + bs*cidx + ridx] = value;  \
89       a_noinsert:; \
90     ailen[brow] = nrow; \
91 }
92 
93 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
94 { \
95     brow = row/bs;  \
96     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
97     rmax = bimax[brow]; nrow = bilen[brow]; \
98       bcol = col/bs; \
99       ridx = row % bs; cidx = col % bs; \
100       low = 0; high = nrow; \
101       while (high-low > 3) { \
102         t = (low+high)/2; \
103         if (rp[t] > bcol) high = t; \
104         else              low  = t; \
105       } \
106       for (_i=low; _i<high; _i++) { \
107         if (rp[_i] > bcol) break; \
108         if (rp[_i] == bcol) { \
109           bap  = ap +  bs2*_i + bs*cidx + ridx; \
110           if (addv == ADD_VALUES) *bap += value;  \
111           else                    *bap  = value;  \
112           goto b_noinsert; \
113         } \
114       } \
115       if (b->nonew == 1) goto b_noinsert; \
116       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
117       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
118       N = nrow++ - 1;  \
119       /* shift up all the later entries in this row */ \
120       for (ii=N; ii>=_i; ii--) { \
121         rp[ii+1] = rp[ii]; \
122         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
123       } \
124       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
125       rp[_i]                      = bcol;  \
126       ap[bs2*_i + bs*cidx + ridx] = value;  \
127       b_noinsert:; \
128     bilen[brow] = nrow; \
129 }
130 
131 /* Only add/insert a(i,j) with i<=j (blocks).
132    Any a(i,j) with i>j input by user is ingored.
133 */
134 #undef __FUNCT__
135 #define __FUNCT__ "MatSetValues_MPISBAIJ"
136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
137 {
138   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
139   MatScalar      value;
140   PetscBool      roworiented = baij->roworiented;
141   PetscErrorCode ierr;
142   PetscInt       i,j,row,col;
143   PetscInt       rstart_orig=mat->rmap->rstart;
144   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
145   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
146 
147   /* Some Variables required in the macro */
148   Mat            A = baij->A;
149   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
150   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
151   MatScalar      *aa=a->a;
152 
153   Mat            B = baij->B;
154   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
155   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
156   MatScalar     *ba=b->a;
157 
158   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
159   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
160   MatScalar     *ap,*bap;
161 
162   /* for stash */
163   PetscInt      n_loc, *in_loc = PETSC_NULL;
164   MatScalar     *v_loc = PETSC_NULL;
165 
166   PetscFunctionBegin;
167   if (v) PetscValidScalarPointer(v,6);
168   if (!baij->donotstash) {
169     if (n > baij->n_loc) {
170       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
171       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
172       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
173       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
174       baij->n_loc = n;
175     }
176     in_loc = baij->in_loc;
177     v_loc  = baij->v_loc;
178   }
179 
180   for (i=0; i<m; i++) {
181     if (im[i] < 0) continue;
182 #if defined(PETSC_USE_DEBUG)
183     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
184 #endif
185     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
186       row = im[i] - rstart_orig;              /* local row index */
187       for (j=0; j<n; j++) {
188         if (im[i]/bs > in[j]/bs) {
189           if (a->ignore_ltriangular) {
190             continue;    /* ignore lower triangular blocks */
191           } else {
192             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
193           }
194         }
195         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
196           col = in[j] - cstart_orig;          /* local col index */
197           brow = row/bs; bcol = col/bs;
198           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
199           if (roworiented) value = v[i*n+j];
200           else             value = v[i+j*m];
201           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
202           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
203         } else if (in[j] < 0) continue;
204 #if defined(PETSC_USE_DEBUG)
205         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
206 #endif
207         else {  /* off-diag entry (B) */
208           if (mat->was_assembled) {
209             if (!baij->colmap) {
210               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
211             }
212 #if defined (PETSC_USE_CTABLE)
213             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
214             col  = col - 1;
215 #else
216             col = baij->colmap[in[j]/bs] - 1;
217 #endif
218             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
219               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
220               col =  in[j];
221               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
222               B = baij->B;
223               b = (Mat_SeqBAIJ*)(B)->data;
224               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
225               ba=b->a;
226             } else col += in[j]%bs;
227           } else col = in[j];
228           if (roworiented) value = v[i*n+j];
229           else             value = v[i+j*m];
230           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         }
233       }
234     } else {  /* off processor entry */
235       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
236       if (!baij->donotstash) {
237         mat->assembled = PETSC_FALSE;
238         n_loc = 0;
239         for (j=0; j<n; j++) {
240           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
241           in_loc[n_loc] = in[j];
242           if (roworiented) {
243             v_loc[n_loc] = v[i*n+j];
244           } else {
245             v_loc[n_loc] = v[j*m+i];
246           }
247           n_loc++;
248         }
249         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
250       }
251     }
252   }
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
258 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
259 {
260   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
261   const MatScalar *value;
262   MatScalar       *barray=baij->barray;
263   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
264   PetscErrorCode  ierr;
265   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
266   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
267   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
268 
269   PetscFunctionBegin;
270   if (!barray) {
271     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
272     baij->barray = barray;
273   }
274 
275   if (roworiented) {
276     stepval = (n-1)*bs;
277   } else {
278     stepval = (m-1)*bs;
279   }
280   for (i=0; i<m; i++) {
281     if (im[i] < 0) continue;
282 #if defined(PETSC_USE_DEBUG)
283     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
284 #endif
285     if (im[i] >= rstart && im[i] < rend) {
286       row = im[i] - rstart;
287       for (j=0; j<n; j++) {
288         if (im[i] > in[j]) {
289           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
290           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
291         }
292         /* If NumCol = 1 then a copy is not required */
293         if ((roworiented) && (n == 1)) {
294           barray = (MatScalar*) v + i*bs2;
295         } else if ((!roworiented) && (m == 1)) {
296           barray = (MatScalar*) v + j*bs2;
297         } else { /* Here a copy is required */
298           if (roworiented) {
299             value = v + i*(stepval+bs)*bs + j*bs;
300           } else {
301             value = v + j*(stepval+bs)*bs + i*bs;
302           }
303           for (ii=0; ii<bs; ii++,value+=stepval) {
304             for (jj=0; jj<bs; jj++) {
305               *barray++  = *value++;
306             }
307           }
308           barray -=bs2;
309         }
310 
311         if (in[j] >= cstart && in[j] < cend) {
312           col  = in[j] - cstart;
313           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
314         }
315         else if (in[j] < 0) continue;
316 #if defined(PETSC_USE_DEBUG)
317         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
318 #endif
319         else {
320           if (mat->was_assembled) {
321             if (!baij->colmap) {
322               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
323             }
324 
325 #if defined(PETSC_USE_DEBUG)
326 #if defined (PETSC_USE_CTABLE)
327             { PetscInt data;
328               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
329               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
330             }
331 #else
332             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
333 #endif
334 #endif
335 #if defined (PETSC_USE_CTABLE)
336             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
337             col  = (col - 1)/bs;
338 #else
339             col = (baij->colmap[in[j]] - 1)/bs;
340 #endif
341             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
342               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
343               col =  in[j];
344             }
345           }
346           else col = in[j];
347           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
348         }
349       }
350     } else {
351       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
352       if (!baij->donotstash) {
353         if (roworiented) {
354           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
355         } else {
356           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
357         }
358       }
359     }
360   }
361   PetscFunctionReturn(0);
362 }
363 
364 #undef __FUNCT__
365 #define __FUNCT__ "MatGetValues_MPISBAIJ"
366 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
367 {
368   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
369   PetscErrorCode ierr;
370   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
371   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
372 
373   PetscFunctionBegin;
374   for (i=0; i<m; i++) {
375     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
376     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
377     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
378       row = idxm[i] - bsrstart;
379       for (j=0; j<n; j++) {
380         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
381         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
382         if (idxn[j] >= bscstart && idxn[j] < bscend) {
383           col = idxn[j] - bscstart;
384           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
385         } else {
386           if (!baij->colmap) {
387             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
388           }
389 #if defined (PETSC_USE_CTABLE)
390           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
391           data --;
392 #else
393           data = baij->colmap[idxn[j]/bs]-1;
394 #endif
395           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
396           else {
397             col  = data + idxn[j]%bs;
398             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
399           }
400         }
401       }
402     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
403   }
404   PetscFunctionReturn(0);
405 }
406 
407 #undef __FUNCT__
408 #define __FUNCT__ "MatNorm_MPISBAIJ"
409 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
410 {
411   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
412   PetscErrorCode ierr;
413   PetscReal      sum[2],*lnorm2;
414 
415   PetscFunctionBegin;
416   if (baij->size == 1) {
417     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
418   } else {
419     if (type == NORM_FROBENIUS) {
420       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
421       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
422       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
423       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
424       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
425       ierr = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
426       *norm = PetscSqrtReal(sum[0] + 2*sum[1]);
427       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
428     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
429       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
430       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
431       PetscReal    *rsum,*rsum2,vabs;
432       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
433       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
434       MatScalar    *v;
435 
436       ierr  = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
437       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
438       /* Amat */
439       v = amat->a; jj = amat->j;
440       for (brow=0; brow<mbs; brow++) {
441         grow = bs*(rstart + brow);
442         nz = amat->i[brow+1] - amat->i[brow];
443         for (bcol=0; bcol<nz; bcol++) {
444           gcol = bs*(rstart + *jj); jj++;
445           for (col=0; col<bs; col++) {
446             for (row=0; row<bs; row++) {
447               vabs = PetscAbsScalar(*v); v++;
448               rsum[gcol+col] += vabs;
449               /* non-diagonal block */
450               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
451             }
452           }
453         }
454       }
455       /* Bmat */
456       v = bmat->a; jj = bmat->j;
457       for (brow=0; brow<mbs; brow++) {
458         grow = bs*(rstart + brow);
459         nz = bmat->i[brow+1] - bmat->i[brow];
460         for (bcol=0; bcol<nz; bcol++) {
461           gcol = bs*garray[*jj]; jj++;
462           for (col=0; col<bs; col++) {
463             for (row=0; row<bs; row++) {
464               vabs = PetscAbsScalar(*v); v++;
465               rsum[gcol+col] += vabs;
466               rsum[grow+row] += vabs;
467             }
468           }
469         }
470       }
471       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
472       *norm = 0.0;
473       for (col=0; col<mat->cmap->N; col++) {
474         if (rsum2[col] > *norm) *norm = rsum2[col];
475       }
476       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
477     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
478   }
479   PetscFunctionReturn(0);
480 }
481 
482 #undef __FUNCT__
483 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
484 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
485 {
486   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
487   PetscErrorCode ierr;
488   PetscInt       nstash,reallocs;
489   InsertMode     addv;
490 
491   PetscFunctionBegin;
492   if (baij->donotstash || mat->nooffprocentries) {
493     PetscFunctionReturn(0);
494   }
495 
496   /* make sure all processors are either in INSERTMODE or ADDMODE */
497   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
498   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
499   mat->insertmode = addv; /* in case this processor had no cache */
500 
501   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
502   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
503   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
504   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
505   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
506   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 #undef __FUNCT__
511 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
512 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
513 {
514   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
515   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
516   PetscErrorCode ierr;
517   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
518   PetscInt       *row,*col;
519   PetscBool      other_disassembled;
520   PetscMPIInt    n;
521   PetscBool      r1,r2,r3;
522   MatScalar      *val;
523   InsertMode     addv = mat->insertmode;
524 
525   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
526   PetscFunctionBegin;
527   if (!baij->donotstash &&  !mat->nooffprocentries) {
528     while (1) {
529       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
530       if (!flg) break;
531 
532       for (i=0; i<n;) {
533         /* Now identify the consecutive vals belonging to the same row */
534         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
535         if (j < n) ncols = j-i;
536         else       ncols = n-i;
537         /* Now assemble all these values with a single function call */
538         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
539         i = j;
540       }
541     }
542     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
543     /* Now process the block-stash. Since the values are stashed column-oriented,
544        set the roworiented flag to column oriented, and after MatSetValues()
545        restore the original flags */
546     r1 = baij->roworiented;
547     r2 = a->roworiented;
548     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
549     baij->roworiented = PETSC_FALSE;
550     a->roworiented    = PETSC_FALSE;
551     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
552     while (1) {
553       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
554       if (!flg) break;
555 
556       for (i=0; i<n;) {
557         /* Now identify the consecutive vals belonging to the same row */
558         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
559         if (j < n) ncols = j-i;
560         else       ncols = n-i;
561         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
562         i = j;
563       }
564     }
565     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
566     baij->roworiented = r1;
567     a->roworiented    = r2;
568     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
569   }
570 
571   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
572   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
573 
574   /* determine if any processor has disassembled, if so we must
575      also disassemble ourselfs, in order that we may reassemble. */
576   /*
577      if nonzero structure of submatrix B cannot change then we know that
578      no processor disassembled thus we can skip this stuff
579   */
580   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
581     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
582     if (mat->was_assembled && !other_disassembled) {
583       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
584     }
585   }
586 
587   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
588     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
589   }
590   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_TRUE);CHKERRQ(ierr);
591   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
592   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
593 
594   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
595   baij->rowvalues = 0;
596   PetscFunctionReturn(0);
597 }
598 
599 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
600 #undef __FUNCT__
601 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
602 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
603 {
604   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
605   PetscErrorCode    ierr;
606   PetscInt          bs = mat->rmap->bs;
607   PetscMPIInt       size = baij->size,rank = baij->rank;
608   PetscBool         iascii,isdraw;
609   PetscViewer       sviewer;
610   PetscViewerFormat format;
611 
612   PetscFunctionBegin;
613   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
614   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
615   if (iascii) {
616     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
617     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
618       MatInfo info;
619       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
620       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
621       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
622       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
623       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
624       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
625       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
626       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
627       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
628       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
629       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
630       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
631       PetscFunctionReturn(0);
632     } else if (format == PETSC_VIEWER_ASCII_INFO) {
633       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
634       PetscFunctionReturn(0);
635     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
636       PetscFunctionReturn(0);
637     }
638   }
639 
640   if (isdraw) {
641     PetscDraw  draw;
642     PetscBool  isnull;
643     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
644     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
645   }
646 
647   if (size == 1) {
648     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
649     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
650   } else {
651     /* assemble the entire matrix onto first processor. */
652     Mat          A;
653     Mat_SeqSBAIJ *Aloc;
654     Mat_SeqBAIJ  *Bloc;
655     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
656     MatScalar    *a;
657 
658     /* Should this be the same type as mat? */
659     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
660     if (!rank) {
661       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
662     } else {
663       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
664     }
665     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
666     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
667     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
668     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
669 
670     /* copy over the A part */
671     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
672     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
673     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
674 
675     for (i=0; i<mbs; i++) {
676       rvals[0] = bs*(baij->rstartbs + i);
677       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
678       for (j=ai[i]; j<ai[i+1]; j++) {
679         col = (baij->cstartbs+aj[j])*bs;
680         for (k=0; k<bs; k++) {
681           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
682           col++; a += bs;
683         }
684       }
685     }
686     /* copy over the B part */
687     Bloc = (Mat_SeqBAIJ*)baij->B->data;
688     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
689     for (i=0; i<mbs; i++) {
690 
691       rvals[0] = bs*(baij->rstartbs + i);
692       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
693       for (j=ai[i]; j<ai[i+1]; j++) {
694         col = baij->garray[aj[j]]*bs;
695         for (k=0; k<bs; k++) {
696           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
697           col++; a += bs;
698         }
699       }
700     }
701     ierr = PetscFree(rvals);CHKERRQ(ierr);
702     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
703     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
704     /*
705        Everyone has to call to draw the matrix since the graphics waits are
706        synchronized across all processors that share the PetscDraw object
707     */
708     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
709     if (!rank) {
710       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
711           /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/
712       PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ);
713       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
714     }
715     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
716     ierr = MatDestroy(&A);CHKERRQ(ierr);
717   }
718   PetscFunctionReturn(0);
719 }
720 
721 #undef __FUNCT__
722 #define __FUNCT__ "MatView_MPISBAIJ"
723 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
724 {
725   PetscErrorCode ierr;
726   PetscBool      iascii,isdraw,issocket,isbinary;
727 
728   PetscFunctionBegin;
729   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
730   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
731   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
732   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
733   if (iascii || isdraw || issocket || isbinary) {
734     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
735   }
736   PetscFunctionReturn(0);
737 }
738 
739 #undef __FUNCT__
740 #define __FUNCT__ "MatDestroy_MPISBAIJ"
741 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
742 {
743   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
744   PetscErrorCode ierr;
745 
746   PetscFunctionBegin;
747 #if defined(PETSC_USE_LOG)
748   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
749 #endif
750   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
751   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
752   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
753   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
754 #if defined (PETSC_USE_CTABLE)
755   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
756 #else
757   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
758 #endif
759   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
760   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
761   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
762   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
763   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
764   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
765   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
766   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
767   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
768   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
769   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
770   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
771   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
772   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
773   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
774 #if defined(PETSC_USE_REAL_MAT_SINGLE)
775   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
776 #endif
777   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
778   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
779   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
780   ierr = PetscFree(mat->data);CHKERRQ(ierr);
781 
782   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
783   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
784   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
785   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
786   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
787   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C","",PETSC_NULL);CHKERRQ(ierr);
788   PetscFunctionReturn(0);
789 }
790 
791 #undef __FUNCT__
792 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
793 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
794 {
795   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
796   PetscErrorCode ierr;
797   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
798   PetscScalar    *x,*from;
799 
800   PetscFunctionBegin;
801   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
802   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
803 
804   /* diagonal part */
805   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
806   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
807 
808   /* subdiagonal part */
809   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
810 
811   /* copy x into the vec slvec0 */
812   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
813   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
814 
815   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
816   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
817   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
818 
819   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
820   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
821   /* supperdiagonal part */
822   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
823   PetscFunctionReturn(0);
824 }
825 
826 #undef __FUNCT__
827 #define __FUNCT__ "MatMult_MPISBAIJ"
828 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
829 {
830   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
831   PetscErrorCode ierr;
832   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
833   PetscScalar    *x,*from;
834 
835   PetscFunctionBegin;
836   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
837   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
838 
839   /* diagonal part */
840   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
841   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
842 
843   /* subdiagonal part */
844   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
845 
846   /* copy x into the vec slvec0 */
847   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
848   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
849 
850   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
851   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
852   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
853 
854   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
855   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
856   /* supperdiagonal part */
857   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
858   PetscFunctionReturn(0);
859 }
860 
861 #undef __FUNCT__
862 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
863 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
864 {
865   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
866   PetscErrorCode ierr;
867   PetscInt       nt;
868 
869   PetscFunctionBegin;
870   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
871   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
872 
873   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
874   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
875 
876   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
877   /* do diagonal part */
878   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
879   /* do supperdiagonal part */
880   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
881   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
882   /* do subdiagonal part */
883   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
884   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
885   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
886   PetscFunctionReturn(0);
887 }
888 
889 #undef __FUNCT__
890 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
891 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
892 {
893   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
894   PetscErrorCode ierr;
895   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
896   PetscScalar    *x,*from,zero=0.0;
897 
898   PetscFunctionBegin;
899   /*
900   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
901   PetscSynchronizedFlush(((PetscObject)A)->comm);
902   */
903   /* diagonal part */
904   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
905   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
906 
907   /* subdiagonal part */
908   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
909 
910   /* copy x into the vec slvec0 */
911   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
912   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
913   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
914   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
915 
916   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
917   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
918   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
919 
920   /* supperdiagonal part */
921   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
922   PetscFunctionReturn(0);
923 }
924 
925 #undef __FUNCT__
926 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
927 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
928 {
929   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
930   PetscErrorCode ierr;
931 
932   PetscFunctionBegin;
933   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
934   /* do diagonal part */
935   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
936   /* do supperdiagonal part */
937   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
938   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
939 
940   /* do subdiagonal part */
941   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
942   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
943   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
944   PetscFunctionReturn(0);
945 }
946 
947 /*
948   This only works correctly for square matrices where the subblock A->A is the
949    diagonal block
950 */
951 #undef __FUNCT__
952 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
953 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
954 {
955   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
956   PetscErrorCode ierr;
957 
958   PetscFunctionBegin;
959   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
960   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
961   PetscFunctionReturn(0);
962 }
963 
964 #undef __FUNCT__
965 #define __FUNCT__ "MatScale_MPISBAIJ"
966 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
967 {
968   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
969   PetscErrorCode ierr;
970 
971   PetscFunctionBegin;
972   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
973   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
974   PetscFunctionReturn(0);
975 }
976 
977 #undef __FUNCT__
978 #define __FUNCT__ "MatGetRow_MPISBAIJ"
979 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
980 {
981   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
982   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
983   PetscErrorCode ierr;
984   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
985   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
986   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
987 
988   PetscFunctionBegin;
989   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
990   mat->getrowactive = PETSC_TRUE;
991 
992   if (!mat->rowvalues && (idx || v)) {
993     /*
994         allocate enough space to hold information from the longest row.
995     */
996     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
997     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
998     PetscInt     max = 1,mbs = mat->mbs,tmp;
999     for (i=0; i<mbs; i++) {
1000       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1001       if (max < tmp) { max = tmp; }
1002     }
1003     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1004   }
1005 
1006   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1007   lrow = row - brstart;  /* local row index */
1008 
1009   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1010   if (!v)   {pvA = 0; pvB = 0;}
1011   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1012   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1013   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1014   nztot = nzA + nzB;
1015 
1016   cmap  = mat->garray;
1017   if (v  || idx) {
1018     if (nztot) {
1019       /* Sort by increasing column numbers, assuming A and B already sorted */
1020       PetscInt imark = -1;
1021       if (v) {
1022         *v = v_p = mat->rowvalues;
1023         for (i=0; i<nzB; i++) {
1024           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1025           else break;
1026         }
1027         imark = i;
1028         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1029         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1030       }
1031       if (idx) {
1032         *idx = idx_p = mat->rowindices;
1033         if (imark > -1) {
1034           for (i=0; i<imark; i++) {
1035             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1036           }
1037         } else {
1038           for (i=0; i<nzB; i++) {
1039             if (cmap[cworkB[i]/bs] < cstart)
1040               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1041             else break;
1042           }
1043           imark = i;
1044         }
1045         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1046         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1047       }
1048     } else {
1049       if (idx) *idx = 0;
1050       if (v)   *v   = 0;
1051     }
1052   }
1053   *nz = nztot;
1054   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1055   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1056   PetscFunctionReturn(0);
1057 }
1058 
1059 #undef __FUNCT__
1060 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1061 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1062 {
1063   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1064 
1065   PetscFunctionBegin;
1066   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1067   baij->getrowactive = PETSC_FALSE;
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1073 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1074 {
1075   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1076   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1077 
1078   PetscFunctionBegin;
1079   aA->getrow_utriangular = PETSC_TRUE;
1080   PetscFunctionReturn(0);
1081 }
1082 #undef __FUNCT__
1083 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1084 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1085 {
1086   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1087   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1088 
1089   PetscFunctionBegin;
1090   aA->getrow_utriangular = PETSC_FALSE;
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 #undef __FUNCT__
1095 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1096 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1097 {
1098   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1099   PetscErrorCode ierr;
1100 
1101   PetscFunctionBegin;
1102   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1103   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1109 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1110 {
1111   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1112   PetscErrorCode ierr;
1113 
1114   PetscFunctionBegin;
1115   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1116   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 #undef __FUNCT__
1121 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1122 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1123 {
1124   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1125   PetscErrorCode ierr;
1126 
1127   PetscFunctionBegin;
1128   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1129   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 #undef __FUNCT__
1134 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1135 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1136 {
1137   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1138   Mat            A = a->A,B = a->B;
1139   PetscErrorCode ierr;
1140   PetscReal      isend[5],irecv[5];
1141 
1142   PetscFunctionBegin;
1143   info->block_size     = (PetscReal)matin->rmap->bs;
1144   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1145   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1146   isend[3] = info->memory;  isend[4] = info->mallocs;
1147   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1148   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1149   isend[3] += info->memory;  isend[4] += info->mallocs;
1150   if (flag == MAT_LOCAL) {
1151     info->nz_used      = isend[0];
1152     info->nz_allocated = isend[1];
1153     info->nz_unneeded  = isend[2];
1154     info->memory       = isend[3];
1155     info->mallocs      = isend[4];
1156   } else if (flag == MAT_GLOBAL_MAX) {
1157     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1158     info->nz_used      = irecv[0];
1159     info->nz_allocated = irecv[1];
1160     info->nz_unneeded  = irecv[2];
1161     info->memory       = irecv[3];
1162     info->mallocs      = irecv[4];
1163   } else if (flag == MAT_GLOBAL_SUM) {
1164     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1165     info->nz_used      = irecv[0];
1166     info->nz_allocated = irecv[1];
1167     info->nz_unneeded  = irecv[2];
1168     info->memory       = irecv[3];
1169     info->mallocs      = irecv[4];
1170   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1171   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1172   info->fill_ratio_needed = 0;
1173   info->factor_mallocs    = 0;
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1179 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool  flg)
1180 {
1181   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1182   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1183   PetscErrorCode ierr;
1184 
1185   PetscFunctionBegin;
1186   switch (op) {
1187   case MAT_NEW_NONZERO_LOCATIONS:
1188   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1189   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1190   case MAT_KEEP_NONZERO_PATTERN:
1191   case MAT_NEW_NONZERO_LOCATION_ERR:
1192     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1193     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1194     break;
1195   case MAT_ROW_ORIENTED:
1196     a->roworiented = flg;
1197     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1198     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1199     break;
1200   case MAT_NEW_DIAGONALS:
1201     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1202     break;
1203   case MAT_IGNORE_OFF_PROC_ENTRIES:
1204     a->donotstash = flg;
1205     break;
1206   case MAT_USE_HASH_TABLE:
1207     a->ht_flag = flg;
1208     break;
1209   case MAT_HERMITIAN:
1210     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1211     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1212     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1213     break;
1214   case MAT_SPD:
1215     A->spd_set                         = PETSC_TRUE;
1216     A->spd                             = flg;
1217     if (flg) {
1218       A->symmetric                     = PETSC_TRUE;
1219       A->structurally_symmetric        = PETSC_TRUE;
1220       A->symmetric_set                 = PETSC_TRUE;
1221       A->structurally_symmetric_set    = PETSC_TRUE;
1222     }
1223     break;
1224   case MAT_SYMMETRIC:
1225     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1226     break;
1227   case MAT_STRUCTURALLY_SYMMETRIC:
1228     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1229     break;
1230   case MAT_SYMMETRY_ETERNAL:
1231     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1232     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1233     break;
1234   case MAT_IGNORE_LOWER_TRIANGULAR:
1235     aA->ignore_ltriangular = flg;
1236     break;
1237   case MAT_ERROR_LOWER_TRIANGULAR:
1238     aA->ignore_ltriangular = flg;
1239     break;
1240   case MAT_GETROW_UPPERTRIANGULAR:
1241     aA->getrow_utriangular = flg;
1242     break;
1243   default:
1244     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1245   }
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1251 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1252 {
1253   PetscErrorCode ierr;
1254 
1255   PetscFunctionBegin;
1256   if (MAT_INITIAL_MATRIX || *B != A) {
1257     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1258   }
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1264 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1265 {
1266   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1267   Mat            a=baij->A, b=baij->B;
1268   PetscErrorCode ierr;
1269   PetscInt       nv,m,n;
1270   PetscBool      flg;
1271 
1272   PetscFunctionBegin;
1273   if (ll != rr) {
1274     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1275     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1276   }
1277   if (!ll) PetscFunctionReturn(0);
1278 
1279   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1280   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1281 
1282   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1283   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1284 
1285   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1286 
1287   /* left diagonalscale the off-diagonal part */
1288   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1289 
1290   /* scale the diagonal part */
1291   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1292 
1293   /* right diagonalscale the off-diagonal part */
1294   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1295   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1296   PetscFunctionReturn(0);
1297 }
1298 
1299 #undef __FUNCT__
1300 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1301 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1302 {
1303   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1304   PetscErrorCode ierr;
1305 
1306   PetscFunctionBegin;
1307   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1312 
1313 #undef __FUNCT__
1314 #define __FUNCT__ "MatEqual_MPISBAIJ"
1315 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1316 {
1317   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1318   Mat            a,b,c,d;
1319   PetscBool      flg;
1320   PetscErrorCode ierr;
1321 
1322   PetscFunctionBegin;
1323   a = matA->A; b = matA->B;
1324   c = matB->A; d = matB->B;
1325 
1326   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1327   if (flg) {
1328     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1329   }
1330   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1331   PetscFunctionReturn(0);
1332 }
1333 
1334 #undef __FUNCT__
1335 #define __FUNCT__ "MatCopy_MPISBAIJ"
1336 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1337 {
1338   PetscErrorCode ierr;
1339   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1340   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1341 
1342   PetscFunctionBegin;
1343   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1344   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1345     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1346     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1347     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1348   } else {
1349     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1350     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1351   }
1352   PetscFunctionReturn(0);
1353 }
1354 
1355 #undef __FUNCT__
1356 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1357 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1358 {
1359   PetscErrorCode ierr;
1360 
1361   PetscFunctionBegin;
1362   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1368 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1369 {
1370   PetscErrorCode ierr;
1371   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1372   PetscBLASInt   bnz,one=1;
1373   Mat_SeqSBAIJ   *xa,*ya;
1374   Mat_SeqBAIJ    *xb,*yb;
1375 
1376   PetscFunctionBegin;
1377   if (str == SAME_NONZERO_PATTERN) {
1378     PetscScalar alpha = a;
1379     xa   = (Mat_SeqSBAIJ *)xx->A->data;
1380     ya   = (Mat_SeqSBAIJ *)yy->A->data;
1381     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1382     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1383     xb   = (Mat_SeqBAIJ *)xx->B->data;
1384     yb   = (Mat_SeqBAIJ *)yy->B->data;
1385     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1386     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1387   } else {
1388     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1389     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1390     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1391   }
1392   PetscFunctionReturn(0);
1393 }
1394 
1395 #undef __FUNCT__
1396 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1397 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1398 {
1399   PetscErrorCode ierr;
1400   PetscInt       i;
1401   PetscBool      flg;
1402 
1403   PetscFunctionBegin;
1404   for (i=0; i<n; i++) {
1405     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1406     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1407   }
1408   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1409   PetscFunctionReturn(0);
1410 }
1411 
1412 
1413 /* -------------------------------------------------------------------*/
1414 static struct _MatOps MatOps_Values = {
1415         MatSetValues_MPISBAIJ,
1416         MatGetRow_MPISBAIJ,
1417         MatRestoreRow_MPISBAIJ,
1418         MatMult_MPISBAIJ,
1419 /*  4*/ MatMultAdd_MPISBAIJ,
1420         MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1421         MatMultAdd_MPISBAIJ,
1422         0,
1423         0,
1424         0,
1425 /* 10*/ 0,
1426         0,
1427         0,
1428         MatSOR_MPISBAIJ,
1429         MatTranspose_MPISBAIJ,
1430 /* 15*/ MatGetInfo_MPISBAIJ,
1431         MatEqual_MPISBAIJ,
1432         MatGetDiagonal_MPISBAIJ,
1433         MatDiagonalScale_MPISBAIJ,
1434         MatNorm_MPISBAIJ,
1435 /* 20*/ MatAssemblyBegin_MPISBAIJ,
1436         MatAssemblyEnd_MPISBAIJ,
1437         MatSetOption_MPISBAIJ,
1438         MatZeroEntries_MPISBAIJ,
1439 /* 24*/ 0,
1440         0,
1441         0,
1442         0,
1443         0,
1444 /* 29*/ MatSetUp_MPISBAIJ,
1445         0,
1446         0,
1447         0,
1448         0,
1449 /* 34*/ MatDuplicate_MPISBAIJ,
1450         0,
1451         0,
1452         0,
1453         0,
1454 /* 39*/ MatAXPY_MPISBAIJ,
1455         MatGetSubMatrices_MPISBAIJ,
1456         MatIncreaseOverlap_MPISBAIJ,
1457         MatGetValues_MPISBAIJ,
1458         MatCopy_MPISBAIJ,
1459 /* 44*/ 0,
1460         MatScale_MPISBAIJ,
1461         0,
1462         0,
1463         0,
1464 /* 49*/ 0,
1465         0,
1466         0,
1467         0,
1468         0,
1469 /* 54*/ 0,
1470         0,
1471         MatSetUnfactored_MPISBAIJ,
1472         0,
1473         MatSetValuesBlocked_MPISBAIJ,
1474 /* 59*/ 0,
1475         0,
1476         0,
1477         0,
1478         0,
1479 /* 64*/ 0,
1480         0,
1481         0,
1482         0,
1483         0,
1484 /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1485         0,
1486         0,
1487         0,
1488         0,
1489 /* 74*/ 0,
1490         0,
1491         0,
1492         0,
1493         0,
1494 /* 79*/ 0,
1495         0,
1496         0,
1497         0,
1498         MatLoad_MPISBAIJ,
1499 /* 84*/ 0,
1500         0,
1501         0,
1502         0,
1503         0,
1504 /* 89*/ 0,
1505         0,
1506         0,
1507         0,
1508         0,
1509 /* 94*/ 0,
1510         0,
1511         0,
1512         0,
1513         0,
1514 /* 99*/ 0,
1515         0,
1516         0,
1517         0,
1518         0,
1519 /*104*/ 0,
1520         MatRealPart_MPISBAIJ,
1521         MatImaginaryPart_MPISBAIJ,
1522         MatGetRowUpperTriangular_MPISBAIJ,
1523         MatRestoreRowUpperTriangular_MPISBAIJ,
1524 /*109*/ 0,
1525         0,
1526         0,
1527         0,
1528         0,
1529 /*114*/ 0,
1530         0,
1531         0,
1532         0,
1533         0,
1534 /*119*/ 0,
1535         0,
1536         0,
1537         0
1538 };
1539 
1540 
1541 EXTERN_C_BEGIN
1542 #undef __FUNCT__
1543 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1544 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1545 {
1546   PetscFunctionBegin;
1547   *a = ((Mat_MPISBAIJ *)A->data)->A;
1548   PetscFunctionReturn(0);
1549 }
1550 EXTERN_C_END
1551 
1552 EXTERN_C_BEGIN
1553 #undef __FUNCT__
1554 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1555 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1556 {
1557   Mat_MPISBAIJ   *b;
1558   PetscErrorCode ierr;
1559   PetscInt       i,mbs,Mbs;
1560 
1561   PetscFunctionBegin;
1562   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1563   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1564   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1565   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1566 
1567   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1568   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1569   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1570   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1571   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1572 
1573   if (d_nnz) {
1574     for (i=0; i<B->rmap->n/bs; i++) {
1575       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1576     }
1577   }
1578   if (o_nnz) {
1579     for (i=0; i<B->rmap->n/bs; i++) {
1580       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1581     }
1582   }
1583 
1584   b   = (Mat_MPISBAIJ*)B->data;
1585   mbs = B->rmap->n/bs;
1586   Mbs = B->rmap->N/bs;
1587   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1588 
1589   B->rmap->bs  = bs;
1590   b->bs2 = bs*bs;
1591   b->mbs = mbs;
1592   b->nbs = mbs;
1593   b->Mbs = Mbs;
1594   b->Nbs = Mbs;
1595 
1596   for (i=0; i<=b->size; i++) {
1597     b->rangebs[i] = B->rmap->range[i]/bs;
1598   }
1599   b->rstartbs = B->rmap->rstart/bs;
1600   b->rendbs   = B->rmap->rend/bs;
1601 
1602   b->cstartbs = B->cmap->rstart/bs;
1603   b->cendbs   = B->cmap->rend/bs;
1604 
1605   if (!B->preallocated) {
1606     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1607     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1608     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1609     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1610     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1611     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1612     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1613     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1614     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1615   }
1616 
1617   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1618   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1619   B->preallocated = PETSC_TRUE;
1620   PetscFunctionReturn(0);
1621 }
1622 EXTERN_C_END
1623 
1624 EXTERN_C_BEGIN
1625 #undef __FUNCT__
1626 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1627 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1628 {
1629   PetscInt       m,rstart,cstart,cend;
1630   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1631   const PetscInt *JJ=0;
1632   PetscScalar    *values=0;
1633   PetscErrorCode ierr;
1634 
1635   PetscFunctionBegin;
1636   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1637   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1638   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1639   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1640   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1641   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1642   m      = B->rmap->n/bs;
1643   rstart = B->rmap->rstart/bs;
1644   cstart = B->cmap->rstart/bs;
1645   cend   = B->cmap->rend/bs;
1646 
1647   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1648   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
1649   for (i=0; i<m; i++) {
1650     nz = ii[i+1] - ii[i];
1651     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1652     nz_max = PetscMax(nz_max,nz);
1653     JJ  = jj + ii[i];
1654     for (j=0; j<nz; j++) {
1655       if (*JJ >= cstart) break;
1656       JJ++;
1657     }
1658     d = 0;
1659     for (; j<nz; j++) {
1660       if (*JJ++ >= cend) break;
1661       d++;
1662     }
1663     d_nnz[i] = d;
1664     o_nnz[i] = nz - d;
1665   }
1666   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1667   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1668 
1669   values = (PetscScalar*)V;
1670   if (!values) {
1671     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1672     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1673   }
1674   for (i=0; i<m; i++) {
1675     PetscInt          row    = i + rstart;
1676     PetscInt          ncols  = ii[i+1] - ii[i];
1677     const PetscInt    *icols = jj + ii[i];
1678     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1679     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1680   }
1681 
1682   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1683   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1684   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1685   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1686   PetscFunctionReturn(0);
1687 }
1688 EXTERN_C_END
1689 
1690 EXTERN_C_BEGIN
1691 #if defined(PETSC_HAVE_MUMPS)
1692 extern PetscErrorCode  MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1693 #endif
1694 #if defined(PETSC_HAVE_PASTIX)
1695 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1696 #endif
1697 EXTERN_C_END
1698 
1699 /*MC
1700    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1701    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1702    the matrix is stored.
1703 
1704   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1705   can call MatSetOption(Mat, MAT_HERMITIAN);
1706 
1707    Options Database Keys:
1708 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1709 
1710   Level: beginner
1711 
1712 .seealso: MatCreateMPISBAIJ
1713 M*/
1714 
1715 EXTERN_C_BEGIN
1716 extern PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
1717 EXTERN_C_END
1718 
1719 EXTERN_C_BEGIN
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatCreate_MPISBAIJ"
1722 PetscErrorCode  MatCreate_MPISBAIJ(Mat B)
1723 {
1724   Mat_MPISBAIJ   *b;
1725   PetscErrorCode ierr;
1726   PetscBool      flg;
1727 
1728   PetscFunctionBegin;
1729   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1730   B->data = (void*)b;
1731   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1732 
1733   B->ops->destroy    = MatDestroy_MPISBAIJ;
1734   B->ops->view       = MatView_MPISBAIJ;
1735   B->assembled       = PETSC_FALSE;
1736 
1737   B->insertmode = NOT_SET_VALUES;
1738   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1739   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1740 
1741   /* build local table of row and column ownerships */
1742   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1743 
1744   /* build cache for off array entries formed */
1745   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1746   b->donotstash  = PETSC_FALSE;
1747   b->colmap      = PETSC_NULL;
1748   b->garray      = PETSC_NULL;
1749   b->roworiented = PETSC_TRUE;
1750 
1751   /* stuff used in block assembly */
1752   b->barray       = 0;
1753 
1754   /* stuff used for matrix vector multiply */
1755   b->lvec         = 0;
1756   b->Mvctx        = 0;
1757   b->slvec0       = 0;
1758   b->slvec0b      = 0;
1759   b->slvec1       = 0;
1760   b->slvec1a      = 0;
1761   b->slvec1b      = 0;
1762   b->sMvctx       = 0;
1763 
1764   /* stuff for MatGetRow() */
1765   b->rowindices   = 0;
1766   b->rowvalues    = 0;
1767   b->getrowactive = PETSC_FALSE;
1768 
1769   /* hash table stuff */
1770   b->ht           = 0;
1771   b->hd           = 0;
1772   b->ht_size      = 0;
1773   b->ht_flag      = PETSC_FALSE;
1774   b->ht_fact      = 0;
1775   b->ht_total_ct  = 0;
1776   b->ht_insert_ct = 0;
1777 
1778   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1779   b->ijonly       = PETSC_FALSE;
1780 
1781   b->in_loc       = 0;
1782   b->v_loc        = 0;
1783   b->n_loc        = 0;
1784   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1785     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1786     if (flg) {
1787       PetscReal fact = 1.39;
1788       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1789       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1790       if (fact <= 1.0) fact = 1.39;
1791       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1792       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1793     }
1794   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1795 
1796 #if defined(PETSC_HAVE_PASTIX)
1797   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1798                                            "MatGetFactor_mpisbaij_pastix",
1799                                            MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1800 #endif
1801 #if defined(PETSC_HAVE_MUMPS)
1802   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1803                                      "MatGetFactor_sbaij_mumps",
1804                                      MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1805 #endif
1806   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1807                                      "MatStoreValues_MPISBAIJ",
1808                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1809   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1810                                      "MatRetrieveValues_MPISBAIJ",
1811                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1812   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1813                                      "MatGetDiagonalBlock_MPISBAIJ",
1814                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1815   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1816                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1817                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1818   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",
1819                                      "MatMPISBAIJSetPreallocationCSR_MPISBAIJ",
1820                                      MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1821   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",
1822                                      "MatConvert_MPISBAIJ_MPISBSTRM",
1823                                       MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1824 
1825   B->symmetric                  = PETSC_TRUE;
1826   B->structurally_symmetric     = PETSC_TRUE;
1827   B->symmetric_set              = PETSC_TRUE;
1828   B->structurally_symmetric_set = PETSC_TRUE;
1829   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1830   PetscFunctionReturn(0);
1831 }
1832 EXTERN_C_END
1833 
1834 /*MC
1835    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1836 
1837    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1838    and MATMPISBAIJ otherwise.
1839 
1840    Options Database Keys:
1841 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1842 
1843   Level: beginner
1844 
1845 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1846 M*/
1847 
1848 #undef __FUNCT__
1849 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1850 /*@C
1851    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1852    the user should preallocate the matrix storage by setting the parameters
1853    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1854    performance can be increased by more than a factor of 50.
1855 
1856    Collective on Mat
1857 
1858    Input Parameters:
1859 +  A - the matrix
1860 .  bs   - size of blockk
1861 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1862            submatrix  (same for all local rows)
1863 .  d_nnz - array containing the number of block nonzeros in the various block rows
1864            in the upper triangular and diagonal part of the in diagonal portion of the local
1865            (possibly different for each block row) or PETSC_NULL.  If you plan to factor the matrix you must leave room
1866            for the diagonal entry and set a value even if it is zero.
1867 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1868            submatrix (same for all local rows).
1869 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1870            off-diagonal portion of the local submatrix that is right of the diagonal
1871            (possibly different for each block row) or PETSC_NULL.
1872 
1873 
1874    Options Database Keys:
1875 .   -mat_no_unroll - uses code that does not unroll the loops in the
1876                      block calculations (much slower)
1877 .   -mat_block_size - size of the blocks to use
1878 
1879    Notes:
1880 
1881    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1882    than it must be used on all processors that share the object for that argument.
1883 
1884    If the *_nnz parameter is given then the *_nz parameter is ignored
1885 
1886    Storage Information:
1887    For a square global matrix we define each processor's diagonal portion
1888    to be its local rows and the corresponding columns (a square submatrix);
1889    each processor's off-diagonal portion encompasses the remainder of the
1890    local matrix (a rectangular submatrix).
1891 
1892    The user can specify preallocated storage for the diagonal part of
1893    the local submatrix with either d_nz or d_nnz (not both).  Set
1894    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1895    memory allocation.  Likewise, specify preallocated storage for the
1896    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1897 
1898    You can call MatGetInfo() to get information on how effective the preallocation was;
1899    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1900    You can also run with the option -info and look for messages with the string
1901    malloc in them to see if additional memory allocation was needed.
1902 
1903    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1904    the figure below we depict these three local rows and all columns (0-11).
1905 
1906 .vb
1907            0 1 2 3 4 5 6 7 8 9 10 11
1908           -------------------
1909    row 3  |  . . . d d d o o o o o o
1910    row 4  |  . . . d d d o o o o o o
1911    row 5  |  . . . d d d o o o o o o
1912           -------------------
1913 .ve
1914 
1915    Thus, any entries in the d locations are stored in the d (diagonal)
1916    submatrix, and any entries in the o locations are stored in the
1917    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1918    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1919 
1920    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1921    plus the diagonal part of the d matrix,
1922    and o_nz should indicate the number of block nonzeros per row in the o matrix
1923 
1924    In general, for PDE problems in which most nonzeros are near the diagonal,
1925    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1926    or you will get TERRIBLE performance; see the users' manual chapter on
1927    matrices.
1928 
1929    Level: intermediate
1930 
1931 .keywords: matrix, block, aij, compressed row, sparse, parallel
1932 
1933 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
1934 @*/
1935 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1936 {
1937   PetscErrorCode ierr;
1938 
1939   PetscFunctionBegin;
1940   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1941   PetscValidType(B,1);
1942   PetscValidLogicalCollectiveInt(B,bs,2);
1943   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1944   PetscFunctionReturn(0);
1945 }
1946 
1947 #undef __FUNCT__
1948 #define __FUNCT__ "MatCreateSBAIJ"
1949 /*@C
1950    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1951    (block compressed row).  For good matrix assembly performance
1952    the user should preallocate the matrix storage by setting the parameters
1953    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1954    performance can be increased by more than a factor of 50.
1955 
1956    Collective on MPI_Comm
1957 
1958    Input Parameters:
1959 +  comm - MPI communicator
1960 .  bs   - size of blockk
1961 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1962            This value should be the same as the local size used in creating the
1963            y vector for the matrix-vector product y = Ax.
1964 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1965            This value should be the same as the local size used in creating the
1966            x vector for the matrix-vector product y = Ax.
1967 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1968 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1969 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1970            submatrix  (same for all local rows)
1971 .  d_nnz - array containing the number of block nonzeros in the various block rows
1972            in the upper triangular portion of the in diagonal portion of the local
1973            (possibly different for each block block row) or PETSC_NULL.
1974            If you plan to factor the matrix you must leave room for the diagonal entry and
1975            set its value even if it is zero.
1976 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1977            submatrix (same for all local rows).
1978 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1979            off-diagonal portion of the local submatrix (possibly different for
1980            each block row) or PETSC_NULL.
1981 
1982    Output Parameter:
1983 .  A - the matrix
1984 
1985    Options Database Keys:
1986 .   -mat_no_unroll - uses code that does not unroll the loops in the
1987                      block calculations (much slower)
1988 .   -mat_block_size - size of the blocks to use
1989 .   -mat_mpi - use the parallel matrix data structures even on one processor
1990                (defaults to using SeqBAIJ format on one processor)
1991 
1992    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1993    MatXXXXSetPreallocation() paradgm instead of this routine directly.
1994    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1995 
1996    Notes:
1997    The number of rows and columns must be divisible by blocksize.
1998    This matrix type does not support complex Hermitian operation.
1999 
2000    The user MUST specify either the local or global matrix dimensions
2001    (possibly both).
2002 
2003    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2004    than it must be used on all processors that share the object for that argument.
2005 
2006    If the *_nnz parameter is given then the *_nz parameter is ignored
2007 
2008    Storage Information:
2009    For a square global matrix we define each processor's diagonal portion
2010    to be its local rows and the corresponding columns (a square submatrix);
2011    each processor's off-diagonal portion encompasses the remainder of the
2012    local matrix (a rectangular submatrix).
2013 
2014    The user can specify preallocated storage for the diagonal part of
2015    the local submatrix with either d_nz or d_nnz (not both).  Set
2016    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2017    memory allocation.  Likewise, specify preallocated storage for the
2018    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2019 
2020    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2021    the figure below we depict these three local rows and all columns (0-11).
2022 
2023 .vb
2024            0 1 2 3 4 5 6 7 8 9 10 11
2025           -------------------
2026    row 3  |  . . . d d d o o o o o o
2027    row 4  |  . . . d d d o o o o o o
2028    row 5  |  . . . d d d o o o o o o
2029           -------------------
2030 .ve
2031 
2032    Thus, any entries in the d locations are stored in the d (diagonal)
2033    submatrix, and any entries in the o locations are stored in the
2034    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2035    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2036 
2037    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2038    plus the diagonal part of the d matrix,
2039    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2040    In general, for PDE problems in which most nonzeros are near the diagonal,
2041    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2042    or you will get TERRIBLE performance; see the users' manual chapter on
2043    matrices.
2044 
2045    Level: intermediate
2046 
2047 .keywords: matrix, block, aij, compressed row, sparse, parallel
2048 
2049 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2050 @*/
2051 
2052 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2053 {
2054   PetscErrorCode ierr;
2055   PetscMPIInt    size;
2056 
2057   PetscFunctionBegin;
2058   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2059   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2060   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2061   if (size > 1) {
2062     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2063     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2064   } else {
2065     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2066     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2067   }
2068   PetscFunctionReturn(0);
2069 }
2070 
2071 
2072 #undef __FUNCT__
2073 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2074 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2075 {
2076   Mat            mat;
2077   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2078   PetscErrorCode ierr;
2079   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2080   PetscScalar    *array;
2081 
2082   PetscFunctionBegin;
2083   *newmat       = 0;
2084   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2085   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2086   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2087   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2088   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2089   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2090 
2091   mat->factortype   = matin->factortype;
2092   mat->preallocated = PETSC_TRUE;
2093   mat->assembled    = PETSC_TRUE;
2094   mat->insertmode   = NOT_SET_VALUES;
2095 
2096   a = (Mat_MPISBAIJ*)mat->data;
2097   a->bs2   = oldmat->bs2;
2098   a->mbs   = oldmat->mbs;
2099   a->nbs   = oldmat->nbs;
2100   a->Mbs   = oldmat->Mbs;
2101   a->Nbs   = oldmat->Nbs;
2102 
2103 
2104   a->size         = oldmat->size;
2105   a->rank         = oldmat->rank;
2106   a->donotstash   = oldmat->donotstash;
2107   a->roworiented  = oldmat->roworiented;
2108   a->rowindices   = 0;
2109   a->rowvalues    = 0;
2110   a->getrowactive = PETSC_FALSE;
2111   a->barray       = 0;
2112   a->rstartbs    = oldmat->rstartbs;
2113   a->rendbs      = oldmat->rendbs;
2114   a->cstartbs    = oldmat->cstartbs;
2115   a->cendbs      = oldmat->cendbs;
2116 
2117   /* hash table stuff */
2118   a->ht           = 0;
2119   a->hd           = 0;
2120   a->ht_size      = 0;
2121   a->ht_flag      = oldmat->ht_flag;
2122   a->ht_fact      = oldmat->ht_fact;
2123   a->ht_total_ct  = 0;
2124   a->ht_insert_ct = 0;
2125 
2126   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2127   if (oldmat->colmap) {
2128 #if defined (PETSC_USE_CTABLE)
2129     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2130 #else
2131     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2132     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2133     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2134 #endif
2135   } else a->colmap = 0;
2136 
2137   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2138     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2139     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2140     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2141   } else a->garray = 0;
2142 
2143   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2144   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2145   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2146   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2147   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2148 
2149   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2150   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2151   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2152   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2153 
2154   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2155   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2156   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2157   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2158   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2159   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2160   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2161   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2162   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2163   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2164   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2165   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2166   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2167 
2168   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2169   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2170   a->sMvctx = oldmat->sMvctx;
2171   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2172 
2173   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2174   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2175   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2176   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2177   ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2178   *newmat = mat;
2179   PetscFunctionReturn(0);
2180 }
2181 
2182 #undef __FUNCT__
2183 #define __FUNCT__ "MatLoad_MPISBAIJ"
2184 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2185 {
2186   PetscErrorCode ierr;
2187   PetscInt       i,nz,j,rstart,rend;
2188   PetscScalar    *vals,*buf;
2189   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2190   MPI_Status     status;
2191   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2192   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2193   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2194   PetscInt       bs=1,Mbs,mbs,extra_rows;
2195   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2196   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2197   int            fd;
2198 
2199   PetscFunctionBegin;
2200   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2201     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2202   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2203 
2204   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2205   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2206   if (!rank) {
2207     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2208     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2209     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2210     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2211   }
2212 
2213   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2214 
2215   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2216   M = header[1]; N = header[2];
2217 
2218   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2219   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2220   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2221 
2222   /* If global sizes are set, check if they are consistent with that given in the file */
2223   if (sizesset) {
2224     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2225   }
2226   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2227   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2228 
2229   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2230 
2231   /*
2232      This code adds extra rows to make sure the number of rows is
2233      divisible by the blocksize
2234   */
2235   Mbs        = M/bs;
2236   extra_rows = bs - M + bs*(Mbs);
2237   if (extra_rows == bs) extra_rows = 0;
2238   else                  Mbs++;
2239   if (extra_rows &&!rank) {
2240     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2241   }
2242 
2243   /* determine ownership of all rows */
2244   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2245     mbs        = Mbs/size + ((Mbs % size) > rank);
2246     m          = mbs*bs;
2247   } else { /* User Set */
2248     m          = newmat->rmap->n;
2249     mbs        = m/bs;
2250   }
2251   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2252   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2253   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2254   rowners[0] = 0;
2255   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2256   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2257   rstart = rowners[rank];
2258   rend   = rowners[rank+1];
2259 
2260   /* distribute row lengths to all processors */
2261   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2262   if (!rank) {
2263     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2264     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2265     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2266     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2267     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2268     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2269     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2270   } else {
2271     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2272   }
2273 
2274   if (!rank) {   /* procs[0] */
2275     /* calculate the number of nonzeros on each processor */
2276     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2277     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2278     for (i=0; i<size; i++) {
2279       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2280         procsnz[i] += rowlengths[j];
2281       }
2282     }
2283     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2284 
2285     /* determine max buffer needed and allocate it */
2286     maxnz = 0;
2287     for (i=0; i<size; i++) {
2288       maxnz = PetscMax(maxnz,procsnz[i]);
2289     }
2290     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2291 
2292     /* read in my part of the matrix column indices  */
2293     nz     = procsnz[0];
2294     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2295     mycols = ibuf;
2296     if (size == 1)  nz -= extra_rows;
2297     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2298     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2299 
2300     /* read in every ones (except the last) and ship off */
2301     for (i=1; i<size-1; i++) {
2302       nz   = procsnz[i];
2303       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2304       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2305     }
2306     /* read in the stuff for the last proc */
2307     if (size != 1) {
2308       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2309       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2310       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2311       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2312     }
2313     ierr = PetscFree(cols);CHKERRQ(ierr);
2314   } else {  /* procs[i], i>0 */
2315     /* determine buffer space needed for message */
2316     nz = 0;
2317     for (i=0; i<m; i++) {
2318       nz += locrowlens[i];
2319     }
2320     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2321     mycols = ibuf;
2322     /* receive message of column indices*/
2323     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2324     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2325     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2326   }
2327 
2328   /* loop over local rows, determining number of off diagonal entries */
2329   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2330   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2331   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2332   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2333   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2334   rowcount = 0;
2335   nzcount  = 0;
2336   for (i=0; i<mbs; i++) {
2337     dcount  = 0;
2338     odcount = 0;
2339     for (j=0; j<bs; j++) {
2340       kmax = locrowlens[rowcount];
2341       for (k=0; k<kmax; k++) {
2342         tmp = mycols[nzcount++]/bs; /* block col. index */
2343         if (!mask[tmp]) {
2344           mask[tmp] = 1;
2345           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2346           else masked1[dcount++] = tmp; /* entry in diag portion */
2347         }
2348       }
2349       rowcount++;
2350     }
2351 
2352     dlens[i]  = dcount;  /* d_nzz[i] */
2353     odlens[i] = odcount; /* o_nzz[i] */
2354 
2355     /* zero out the mask elements we set */
2356     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2357     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2358   }
2359     if (!sizesset) {
2360     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2361   }
2362   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2363   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2364 
2365   if (!rank) {
2366     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2367     /* read in my part of the matrix numerical values  */
2368     nz = procsnz[0];
2369     vals = buf;
2370     mycols = ibuf;
2371     if (size == 1)  nz -= extra_rows;
2372     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2373     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2374 
2375     /* insert into matrix */
2376     jj      = rstart*bs;
2377     for (i=0; i<m; i++) {
2378       ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2379       mycols += locrowlens[i];
2380       vals   += locrowlens[i];
2381       jj++;
2382     }
2383 
2384     /* read in other processors (except the last one) and ship out */
2385     for (i=1; i<size-1; i++) {
2386       nz   = procsnz[i];
2387       vals = buf;
2388       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2389       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2390     }
2391     /* the last proc */
2392     if (size != 1) {
2393       nz   = procsnz[i] - extra_rows;
2394       vals = buf;
2395       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2396       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2397       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2398     }
2399     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2400 
2401   } else {
2402     /* receive numeric values */
2403     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2404 
2405     /* receive message of values*/
2406     vals   = buf;
2407     mycols = ibuf;
2408     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2409     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2410     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2411 
2412     /* insert into matrix */
2413     jj      = rstart*bs;
2414     for (i=0; i<m; i++) {
2415       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2416       mycols += locrowlens[i];
2417       vals   += locrowlens[i];
2418       jj++;
2419     }
2420   }
2421 
2422   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2423   ierr = PetscFree(buf);CHKERRQ(ierr);
2424   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2425   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2426   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2427   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2428   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2429   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2430   PetscFunctionReturn(0);
2431 }
2432 
2433 #undef __FUNCT__
2434 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2435 /*XXXXX@
2436    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2437 
2438    Input Parameters:
2439 .  mat  - the matrix
2440 .  fact - factor
2441 
2442    Not Collective on Mat, each process can have a different hash factor
2443 
2444    Level: advanced
2445 
2446   Notes:
2447    This can also be set by the command line option: -mat_use_hash_table fact
2448 
2449 .keywords: matrix, hashtable, factor, HT
2450 
2451 .seealso: MatSetOption()
2452 @XXXXX*/
2453 
2454 
2455 #undef __FUNCT__
2456 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2457 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2458 {
2459   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2460   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2461   PetscReal      atmp;
2462   PetscReal      *work,*svalues,*rvalues;
2463   PetscErrorCode ierr;
2464   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2465   PetscMPIInt    rank,size;
2466   PetscInt       *rowners_bs,dest,count,source;
2467   PetscScalar    *va;
2468   MatScalar      *ba;
2469   MPI_Status     stat;
2470 
2471   PetscFunctionBegin;
2472   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2473   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2474   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2475 
2476   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2477   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2478 
2479   bs   = A->rmap->bs;
2480   mbs  = a->mbs;
2481   Mbs  = a->Mbs;
2482   ba   = b->a;
2483   bi   = b->i;
2484   bj   = b->j;
2485 
2486   /* find ownerships */
2487   rowners_bs = A->rmap->range;
2488 
2489   /* each proc creates an array to be distributed */
2490   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2491   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2492 
2493   /* row_max for B */
2494   if (rank != size-1) {
2495     for (i=0; i<mbs; i++) {
2496       ncols = bi[1] - bi[0]; bi++;
2497       brow  = bs*i;
2498       for (j=0; j<ncols; j++) {
2499         bcol = bs*(*bj);
2500         for (kcol=0; kcol<bs; kcol++) {
2501           col = bcol + kcol;                 /* local col index */
2502           col += rowners_bs[rank+1];      /* global col index */
2503           for (krow=0; krow<bs; krow++) {
2504             atmp = PetscAbsScalar(*ba); ba++;
2505             row = brow + krow;    /* local row index */
2506             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2507             if (work[col] < atmp) work[col] = atmp;
2508           }
2509         }
2510         bj++;
2511       }
2512     }
2513 
2514     /* send values to its owners */
2515     for (dest=rank+1; dest<size; dest++) {
2516       svalues = work + rowners_bs[dest];
2517       count   = rowners_bs[dest+1]-rowners_bs[dest];
2518       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2519     }
2520   }
2521 
2522   /* receive values */
2523   if (rank) {
2524     rvalues = work;
2525     count   = rowners_bs[rank+1]-rowners_bs[rank];
2526     for (source=0; source<rank; source++) {
2527       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2528       /* process values */
2529       for (i=0; i<count; i++) {
2530         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2531       }
2532     }
2533   }
2534 
2535   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2536   ierr = PetscFree(work);CHKERRQ(ierr);
2537   PetscFunctionReturn(0);
2538 }
2539 
2540 #undef __FUNCT__
2541 #define __FUNCT__ "MatSOR_MPISBAIJ"
2542 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2543 {
2544   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2545   PetscErrorCode    ierr;
2546   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2547   PetscScalar       *x,*ptr,*from;
2548   Vec               bb1;
2549   const PetscScalar *b;
2550 
2551   PetscFunctionBegin;
2552   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2553   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2554 
2555   if (flag == SOR_APPLY_UPPER) {
2556     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2557     PetscFunctionReturn(0);
2558   }
2559 
2560   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2561     if (flag & SOR_ZERO_INITIAL_GUESS) {
2562       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2563       its--;
2564     }
2565 
2566     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2567     while (its--) {
2568 
2569       /* lower triangular part: slvec0b = - B^T*xx */
2570       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2571 
2572       /* copy xx into slvec0a */
2573       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2574       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2575       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2576       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2577 
2578       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2579 
2580       /* copy bb into slvec1a */
2581       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2582       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2583       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2584       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2585 
2586       /* set slvec1b = 0 */
2587       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2588 
2589       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2590       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2591       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2592       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2593 
2594       /* upper triangular part: bb1 = bb1 - B*x */
2595       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2596 
2597       /* local diagonal sweep */
2598       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2599     }
2600     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2601   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2602     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2603   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2604     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2605   } else if (flag & SOR_EISENSTAT) {
2606     Vec               xx1;
2607     PetscBool         hasop;
2608     const PetscScalar *diag;
2609     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2610     PetscInt          i,n;
2611 
2612     if (!mat->xx1) {
2613       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2614       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2615     }
2616     xx1 = mat->xx1;
2617     bb1 = mat->bb1;
2618 
2619     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2620 
2621     if (!mat->diag) {
2622       /* this is wrong for same matrix with new nonzero values */
2623       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2624       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2625     }
2626     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2627 
2628     if (hasop) {
2629       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2630       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2631     } else {
2632       /*
2633           These two lines are replaced by code that may be a bit faster for a good compiler
2634       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2635       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2636       */
2637       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2638       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2639       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2640       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2641       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2642       if (omega == 1.0) {
2643         for (i=0; i<n; i++) {
2644           sl[i] = b[i] - diag[i]*x[i];
2645         }
2646         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2647       } else {
2648         for (i=0; i<n; i++) {
2649           sl[i] = b[i] + scale*diag[i]*x[i];
2650         }
2651         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2652       }
2653       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2654       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2655       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2656       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2657     }
2658 
2659     /* multiply off-diagonal portion of matrix */
2660     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2661     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2662     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2663     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2664     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2665     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2666     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2667     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2668     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2669     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2670 
2671     /* local sweep */
2672     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2673     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2674   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2675   PetscFunctionReturn(0);
2676 }
2677 
2678 #undef __FUNCT__
2679 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2680 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2681 {
2682   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2683   PetscErrorCode ierr;
2684   Vec            lvec1,bb1;
2685 
2686   PetscFunctionBegin;
2687   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2688   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2689 
2690   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2691     if (flag & SOR_ZERO_INITIAL_GUESS) {
2692       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2693       its--;
2694     }
2695 
2696     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2697     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2698     while (its--) {
2699       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2700 
2701       /* lower diagonal part: bb1 = bb - B^T*xx */
2702       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2703       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2704 
2705       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2706       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2707       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2708 
2709       /* upper diagonal part: bb1 = bb1 - B*x */
2710       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2711       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2712 
2713       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2714 
2715       /* diagonal sweep */
2716       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2717     }
2718     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2719     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2720   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2721   PetscFunctionReturn(0);
2722 }
2723 
2724 #undef __FUNCT__
2725 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2726 /*@
2727      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2728          CSR format the local rows.
2729 
2730    Collective on MPI_Comm
2731 
2732    Input Parameters:
2733 +  comm - MPI communicator
2734 .  bs - the block size, only a block size of 1 is supported
2735 .  m - number of local rows (Cannot be PETSC_DECIDE)
2736 .  n - This value should be the same as the local size used in creating the
2737        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2738        calculated if N is given) For square matrices n is almost always m.
2739 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2740 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2741 .   i - row indices
2742 .   j - column indices
2743 -   a - matrix values
2744 
2745    Output Parameter:
2746 .   mat - the matrix
2747 
2748    Level: intermediate
2749 
2750    Notes:
2751        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2752      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2753      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2754 
2755        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2756 
2757 .keywords: matrix, aij, compressed row, sparse, parallel
2758 
2759 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2760           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2761 @*/
2762 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2763 {
2764   PetscErrorCode ierr;
2765 
2766 
2767  PetscFunctionBegin;
2768   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2769   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2770   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2771   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2772   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2773   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2774   PetscFunctionReturn(0);
2775 }
2776 
2777 
2778 #undef __FUNCT__
2779 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2780 /*@C
2781    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2782    (the default parallel PETSc format).
2783 
2784    Collective on MPI_Comm
2785 
2786    Input Parameters:
2787 +  A - the matrix
2788 .  bs - the block size
2789 .  i - the indices into j for the start of each local row (starts with zero)
2790 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2791 -  v - optional values in the matrix
2792 
2793    Level: developer
2794 
2795 .keywords: matrix, aij, compressed row, sparse, parallel
2796 
2797 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2798 @*/
2799 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2800 {
2801   PetscErrorCode ierr;
2802 
2803   PetscFunctionBegin;
2804   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2805   PetscFunctionReturn(0);
2806 }
2807 
2808 
2809