xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision d45987f3cb8da7dca4e7c09f0fedfc3d8f6afad7)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 EXTERN_C_BEGIN
24 #undef __FUNCT__
25 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
26 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
27 {
28   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
29   PetscErrorCode ierr;
30 
31   PetscFunctionBegin;
32   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
33   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN_C_BEGIN
39 #undef __FUNCT__
40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
41 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
42 {
43   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
48   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 EXTERN_C_END
52 
53 
54 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
55 { \
56  \
57     brow = row/bs;  \
58     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
59     rmax = aimax[brow]; nrow = ailen[brow]; \
60       bcol = col/bs; \
61       ridx = row % bs; cidx = col % bs; \
62       low = 0; high = nrow; \
63       while (high-low > 3) { \
64         t = (low+high)/2; \
65         if (rp[t] > bcol) high = t; \
66         else              low  = t; \
67       } \
68       for (_i=low; _i<high; _i++) { \
69         if (rp[_i] > bcol) break; \
70         if (rp[_i] == bcol) { \
71           bap  = ap +  bs2*_i + bs*cidx + ridx; \
72           if (addv == ADD_VALUES) *bap += value;  \
73           else                    *bap  = value;  \
74           goto a_noinsert; \
75         } \
76       } \
77       if (a->nonew == 1) goto a_noinsert; \
78       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
79       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
80       N = nrow++ - 1;  \
81       /* shift up all the later entries in this row */ \
82       for (ii=N; ii>=_i; ii--) { \
83         rp[ii+1] = rp[ii]; \
84         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
85       } \
86       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
87       rp[_i]                      = bcol;  \
88       ap[bs2*_i + bs*cidx + ridx] = value;  \
89       a_noinsert:; \
90     ailen[brow] = nrow; \
91 }
92 
93 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
94 { \
95     brow = row/bs;  \
96     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
97     rmax = bimax[brow]; nrow = bilen[brow]; \
98       bcol = col/bs; \
99       ridx = row % bs; cidx = col % bs; \
100       low = 0; high = nrow; \
101       while (high-low > 3) { \
102         t = (low+high)/2; \
103         if (rp[t] > bcol) high = t; \
104         else              low  = t; \
105       } \
106       for (_i=low; _i<high; _i++) { \
107         if (rp[_i] > bcol) break; \
108         if (rp[_i] == bcol) { \
109           bap  = ap +  bs2*_i + bs*cidx + ridx; \
110           if (addv == ADD_VALUES) *bap += value;  \
111           else                    *bap  = value;  \
112           goto b_noinsert; \
113         } \
114       } \
115       if (b->nonew == 1) goto b_noinsert; \
116       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
117       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
118       N = nrow++ - 1;  \
119       /* shift up all the later entries in this row */ \
120       for (ii=N; ii>=_i; ii--) { \
121         rp[ii+1] = rp[ii]; \
122         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
123       } \
124       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
125       rp[_i]                      = bcol;  \
126       ap[bs2*_i + bs*cidx + ridx] = value;  \
127       b_noinsert:; \
128     bilen[brow] = nrow; \
129 }
130 
131 /* Only add/insert a(i,j) with i<=j (blocks).
132    Any a(i,j) with i>j input by user is ingored.
133 */
134 #undef __FUNCT__
135 #define __FUNCT__ "MatSetValues_MPISBAIJ"
136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
137 {
138   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
139   MatScalar      value;
140   PetscBool      roworiented = baij->roworiented;
141   PetscErrorCode ierr;
142   PetscInt       i,j,row,col;
143   PetscInt       rstart_orig=mat->rmap->rstart;
144   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
145   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
146 
147   /* Some Variables required in the macro */
148   Mat            A = baij->A;
149   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
150   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
151   MatScalar      *aa=a->a;
152 
153   Mat            B = baij->B;
154   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
155   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
156   MatScalar     *ba=b->a;
157 
158   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
159   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
160   MatScalar     *ap,*bap;
161 
162   /* for stash */
163   PetscInt      n_loc, *in_loc = PETSC_NULL;
164   MatScalar     *v_loc = PETSC_NULL;
165 
166   PetscFunctionBegin;
167   if (v) PetscValidScalarPointer(v,6);
168   if (!baij->donotstash){
169     if (n > baij->n_loc) {
170       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
171       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
172       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
173       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
174       baij->n_loc = n;
175     }
176     in_loc = baij->in_loc;
177     v_loc  = baij->v_loc;
178   }
179 
180   for (i=0; i<m; i++) {
181     if (im[i] < 0) continue;
182 #if defined(PETSC_USE_DEBUG)
183     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
184 #endif
185     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
186       row = im[i] - rstart_orig;              /* local row index */
187       for (j=0; j<n; j++) {
188         if (im[i]/bs > in[j]/bs){
189           if (a->ignore_ltriangular){
190             continue;    /* ignore lower triangular blocks */
191           } else {
192             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
193           }
194         }
195         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
196           col = in[j] - cstart_orig;          /* local col index */
197           brow = row/bs; bcol = col/bs;
198           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
199           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
200           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
201           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
202         } else if (in[j] < 0) continue;
203 #if defined(PETSC_USE_DEBUG)
204         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
205 #endif
206         else {  /* off-diag entry (B) */
207           if (mat->was_assembled) {
208             if (!baij->colmap) {
209               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
210             }
211 #if defined (PETSC_USE_CTABLE)
212             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
213             col  = col - 1;
214 #else
215             col = baij->colmap[in[j]/bs] - 1;
216 #endif
217             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
218               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
219               col =  in[j];
220               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
221               B = baij->B;
222               b = (Mat_SeqBAIJ*)(B)->data;
223               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
224               ba=b->a;
225             } else col += in[j]%bs;
226           } else col = in[j];
227           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
228           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
229           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
230         }
231       }
232     } else {  /* off processor entry */
233       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
234       if (!baij->donotstash) {
235         mat->assembled = PETSC_FALSE;
236         n_loc = 0;
237         for (j=0; j<n; j++){
238           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
239           in_loc[n_loc] = in[j];
240           if (roworiented) {
241             v_loc[n_loc] = v[i*n+j];
242           } else {
243             v_loc[n_loc] = v[j*m+i];
244           }
245           n_loc++;
246         }
247         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
248       }
249     }
250   }
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
257 {
258   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
259   const MatScalar *value;
260   MatScalar       *barray=baij->barray;
261   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
262   PetscErrorCode  ierr;
263   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
264   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
265   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
266 
267   PetscFunctionBegin;
268   if(!barray) {
269     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
270     baij->barray = barray;
271   }
272 
273   if (roworiented) {
274     stepval = (n-1)*bs;
275   } else {
276     stepval = (m-1)*bs;
277   }
278   for (i=0; i<m; i++) {
279     if (im[i] < 0) continue;
280 #if defined(PETSC_USE_DEBUG)
281     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
282 #endif
283     if (im[i] >= rstart && im[i] < rend) {
284       row = im[i] - rstart;
285       for (j=0; j<n; j++) {
286         if (im[i] > in[j]) {
287           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
288           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
289         }
290         /* If NumCol = 1 then a copy is not required */
291         if ((roworiented) && (n == 1)) {
292           barray = (MatScalar*) v + i*bs2;
293         } else if((!roworiented) && (m == 1)) {
294           barray = (MatScalar*) v + j*bs2;
295         } else { /* Here a copy is required */
296           if (roworiented) {
297             value = v + i*(stepval+bs)*bs + j*bs;
298           } else {
299             value = v + j*(stepval+bs)*bs + i*bs;
300           }
301           for (ii=0; ii<bs; ii++,value+=stepval) {
302             for (jj=0; jj<bs; jj++) {
303               *barray++  = *value++;
304             }
305           }
306           barray -=bs2;
307         }
308 
309         if (in[j] >= cstart && in[j] < cend){
310           col  = in[j] - cstart;
311           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
312         }
313         else if (in[j] < 0) continue;
314 #if defined(PETSC_USE_DEBUG)
315         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
316 #endif
317         else {
318           if (mat->was_assembled) {
319             if (!baij->colmap) {
320               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
321             }
322 
323 #if defined(PETSC_USE_DEBUG)
324 #if defined (PETSC_USE_CTABLE)
325             { PetscInt data;
326               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
327               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
328             }
329 #else
330             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
331 #endif
332 #endif
333 #if defined (PETSC_USE_CTABLE)
334 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
335             col  = (col - 1)/bs;
336 #else
337             col = (baij->colmap[in[j]] - 1)/bs;
338 #endif
339             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
340               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
341               col =  in[j];
342             }
343           }
344           else col = in[j];
345           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
346         }
347       }
348     } else {
349       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
350       if (!baij->donotstash) {
351         if (roworiented) {
352           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
353         } else {
354           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
355         }
356       }
357     }
358   }
359   PetscFunctionReturn(0);
360 }
361 
362 #undef __FUNCT__
363 #define __FUNCT__ "MatGetValues_MPISBAIJ"
364 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
365 {
366   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
367   PetscErrorCode ierr;
368   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
369   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
370 
371   PetscFunctionBegin;
372   for (i=0; i<m; i++) {
373     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
374     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
375     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
376       row = idxm[i] - bsrstart;
377       for (j=0; j<n; j++) {
378         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
379         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
380         if (idxn[j] >= bscstart && idxn[j] < bscend){
381           col = idxn[j] - bscstart;
382           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
383         } else {
384           if (!baij->colmap) {
385             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
386           }
387 #if defined (PETSC_USE_CTABLE)
388           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
389           data --;
390 #else
391           data = baij->colmap[idxn[j]/bs]-1;
392 #endif
393           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
394           else {
395             col  = data + idxn[j]%bs;
396             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
397           }
398         }
399       }
400     } else {
401       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
402     }
403   }
404  PetscFunctionReturn(0);
405 }
406 
407 #undef __FUNCT__
408 #define __FUNCT__ "MatNorm_MPISBAIJ"
409 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
410 {
411   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
412   PetscErrorCode ierr;
413   PetscReal      sum[2],*lnorm2;
414 
415   PetscFunctionBegin;
416   if (baij->size == 1) {
417     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
418   } else {
419     if (type == NORM_FROBENIUS) {
420       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
421       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
422       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
423       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
424       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
425       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
426       *norm = PetscSqrtReal(sum[0] + 2*sum[1]);
427       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
428     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
429       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
430       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
431       PetscReal    *rsum,*rsum2,vabs;
432       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
433       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
434       MatScalar    *v;
435 
436       ierr  = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
437       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
438       /* Amat */
439       v = amat->a; jj = amat->j;
440       for (brow=0; brow<mbs; brow++) {
441         grow = bs*(rstart + brow);
442         nz = amat->i[brow+1] - amat->i[brow];
443         for (bcol=0; bcol<nz; bcol++){
444           gcol = bs*(rstart + *jj); jj++;
445           for (col=0; col<bs; col++){
446             for (row=0; row<bs; row++){
447               vabs = PetscAbsScalar(*v); v++;
448               rsum[gcol+col] += vabs;
449               /* non-diagonal block */
450               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
451             }
452           }
453         }
454       }
455       /* Bmat */
456       v = bmat->a; jj = bmat->j;
457       for (brow=0; brow<mbs; brow++) {
458         grow = bs*(rstart + brow);
459         nz = bmat->i[brow+1] - bmat->i[brow];
460         for (bcol=0; bcol<nz; bcol++){
461           gcol = bs*garray[*jj]; jj++;
462           for (col=0; col<bs; col++){
463             for (row=0; row<bs; row++){
464               vabs = PetscAbsScalar(*v); v++;
465               rsum[gcol+col] += vabs;
466               rsum[grow+row] += vabs;
467             }
468           }
469         }
470       }
471       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
472       *norm = 0.0;
473       for (col=0; col<mat->cmap->N; col++) {
474         if (rsum2[col] > *norm) *norm = rsum2[col];
475       }
476       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
477     } else {
478       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
479     }
480   }
481   PetscFunctionReturn(0);
482 }
483 
484 #undef __FUNCT__
485 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
486 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
487 {
488   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
489   PetscErrorCode ierr;
490   PetscInt       nstash,reallocs;
491   InsertMode     addv;
492 
493   PetscFunctionBegin;
494   if (baij->donotstash || mat->nooffprocentries) {
495     PetscFunctionReturn(0);
496   }
497 
498   /* make sure all processors are either in INSERTMODE or ADDMODE */
499   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
500   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
501   mat->insertmode = addv; /* in case this processor had no cache */
502 
503   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
504   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
505   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
506   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
507   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
508   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 #undef __FUNCT__
513 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
514 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
515 {
516   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
517   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
518   PetscErrorCode ierr;
519   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
520   PetscInt       *row,*col;
521   PetscBool      other_disassembled;
522   PetscMPIInt    n;
523   PetscBool      r1,r2,r3;
524   MatScalar      *val;
525   InsertMode     addv = mat->insertmode;
526 
527   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
528   PetscFunctionBegin;
529 
530   if (!baij->donotstash &&  !mat->nooffprocentries) {
531     while (1) {
532       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
533       if (!flg) break;
534 
535       for (i=0; i<n;) {
536         /* Now identify the consecutive vals belonging to the same row */
537         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
538         if (j < n) ncols = j-i;
539         else       ncols = n-i;
540         /* Now assemble all these values with a single function call */
541         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
542         i = j;
543       }
544     }
545     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
546     /* Now process the block-stash. Since the values are stashed column-oriented,
547        set the roworiented flag to column oriented, and after MatSetValues()
548        restore the original flags */
549     r1 = baij->roworiented;
550     r2 = a->roworiented;
551     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
552     baij->roworiented = PETSC_FALSE;
553     a->roworiented    = PETSC_FALSE;
554     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
555     while (1) {
556       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
557       if (!flg) break;
558 
559       for (i=0; i<n;) {
560         /* Now identify the consecutive vals belonging to the same row */
561         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
562         if (j < n) ncols = j-i;
563         else       ncols = n-i;
564         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
565         i = j;
566       }
567     }
568     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
569     baij->roworiented = r1;
570     a->roworiented    = r2;
571     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
572   }
573 
574   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
575   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
576 
577   /* determine if any processor has disassembled, if so we must
578      also disassemble ourselfs, in order that we may reassemble. */
579   /*
580      if nonzero structure of submatrix B cannot change then we know that
581      no processor disassembled thus we can skip this stuff
582   */
583   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
584     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
585     if (mat->was_assembled && !other_disassembled) {
586       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
587     }
588   }
589 
590   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
591     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
592   }
593   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_TRUE);CHKERRQ(ierr);
594   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
595   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
596 
597   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
598   baij->rowvalues = 0;
599 
600   PetscFunctionReturn(0);
601 }
602 
603 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
604 #undef __FUNCT__
605 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
606 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
607 {
608   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
609   PetscErrorCode    ierr;
610   PetscInt          bs = mat->rmap->bs;
611   PetscMPIInt       size = baij->size,rank = baij->rank;
612   PetscBool         iascii,isdraw;
613   PetscViewer       sviewer;
614   PetscViewerFormat format;
615 
616   PetscFunctionBegin;
617   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
618   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
619   if (iascii) {
620     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
621     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
622       MatInfo info;
623       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
624       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
625       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
626       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
627       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
628       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
629       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
630       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
631       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
632       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
633       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
634       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
635       PetscFunctionReturn(0);
636     } else if (format == PETSC_VIEWER_ASCII_INFO) {
637       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
638       PetscFunctionReturn(0);
639     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
640       PetscFunctionReturn(0);
641     }
642   }
643 
644   if (isdraw) {
645     PetscDraw  draw;
646     PetscBool  isnull;
647     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
648     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
649   }
650 
651   if (size == 1) {
652     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
653     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
654   } else {
655     /* assemble the entire matrix onto first processor. */
656     Mat          A;
657     Mat_SeqSBAIJ *Aloc;
658     Mat_SeqBAIJ  *Bloc;
659     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
660     MatScalar    *a;
661 
662     /* Should this be the same type as mat? */
663     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
664     if (!rank) {
665       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
666     } else {
667       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
668     }
669     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
670     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
671     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
672     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
673 
674     /* copy over the A part */
675     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
676     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
677     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
678 
679     for (i=0; i<mbs; i++) {
680       rvals[0] = bs*(baij->rstartbs + i);
681       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
682       for (j=ai[i]; j<ai[i+1]; j++) {
683         col = (baij->cstartbs+aj[j])*bs;
684         for (k=0; k<bs; k++) {
685           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
686           col++; a += bs;
687         }
688       }
689     }
690     /* copy over the B part */
691     Bloc = (Mat_SeqBAIJ*)baij->B->data;
692     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
693     for (i=0; i<mbs; i++) {
694 
695       rvals[0] = bs*(baij->rstartbs + i);
696       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
697       for (j=ai[i]; j<ai[i+1]; j++) {
698         col = baij->garray[aj[j]]*bs;
699         for (k=0; k<bs; k++) {
700           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
701           col++; a += bs;
702         }
703       }
704     }
705     ierr = PetscFree(rvals);CHKERRQ(ierr);
706     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
707     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
708     /*
709        Everyone has to call to draw the matrix since the graphics waits are
710        synchronized across all processors that share the PetscDraw object
711     */
712     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
713     if (!rank) {
714       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
715           /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/
716       PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ);
717       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
718     }
719     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
720     ierr = MatDestroy(&A);CHKERRQ(ierr);
721   }
722   PetscFunctionReturn(0);
723 }
724 
725 #undef __FUNCT__
726 #define __FUNCT__ "MatView_MPISBAIJ"
727 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
728 {
729   PetscErrorCode ierr;
730   PetscBool      iascii,isdraw,issocket,isbinary;
731 
732   PetscFunctionBegin;
733   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
734   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
735   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
736   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
737   if (iascii || isdraw || issocket || isbinary) {
738     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
739   } else {
740     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
741   }
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNCT__
746 #define __FUNCT__ "MatDestroy_MPISBAIJ"
747 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
748 {
749   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
750   PetscErrorCode ierr;
751 
752   PetscFunctionBegin;
753 #if defined(PETSC_USE_LOG)
754   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
755 #endif
756   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
757   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
758   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
759   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
760 #if defined (PETSC_USE_CTABLE)
761   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
762 #else
763   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
764 #endif
765   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
766   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
767   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
768   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
769   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
770   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
771   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
772   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
773   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
774   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
775   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
776   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
777   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
778   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
779   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
780 #if defined(PETSC_USE_REAL_MAT_SINGLE)
781   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
782 #endif
783   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
784   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
785   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
786   ierr = PetscFree(mat->data);CHKERRQ(ierr);
787 
788   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
793   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C","",PETSC_NULL);CHKERRQ(ierr);
794   PetscFunctionReturn(0);
795 }
796 
797 #undef __FUNCT__
798 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
799 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
800 {
801   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
802   PetscErrorCode ierr;
803   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
804   PetscScalar    *x,*from;
805 
806   PetscFunctionBegin;
807   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
808   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
809 
810   /* diagonal part */
811   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
812   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
813 
814   /* subdiagonal part */
815   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
816 
817   /* copy x into the vec slvec0 */
818   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
819   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
820 
821   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
822   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
823   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
824 
825   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
826   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
827   /* supperdiagonal part */
828   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
829   PetscFunctionReturn(0);
830 }
831 
832 #undef __FUNCT__
833 #define __FUNCT__ "MatMult_MPISBAIJ"
834 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
835 {
836   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
837   PetscErrorCode ierr;
838   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
839   PetscScalar    *x,*from;
840 
841   PetscFunctionBegin;
842   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
843   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
844 
845   /* diagonal part */
846   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
847   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
848 
849   /* subdiagonal part */
850   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
851 
852   /* copy x into the vec slvec0 */
853   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
854   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
855 
856   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
857   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
858   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
859 
860   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
861   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862   /* supperdiagonal part */
863   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
869 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
870 {
871   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
872   PetscErrorCode ierr;
873   PetscInt       nt;
874 
875   PetscFunctionBegin;
876   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
877   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
878 
879   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
880   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
881 
882   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
883   /* do diagonal part */
884   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
885   /* do supperdiagonal part */
886   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
888   /* do subdiagonal part */
889   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
890   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
891   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
892 
893   PetscFunctionReturn(0);
894 }
895 
896 #undef __FUNCT__
897 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
898 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
899 {
900   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
901   PetscErrorCode ierr;
902   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
903   PetscScalar    *x,*from,zero=0.0;
904 
905   PetscFunctionBegin;
906   /*
907   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
908   PetscSynchronizedFlush(((PetscObject)A)->comm);
909   */
910   /* diagonal part */
911   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
912   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
913 
914   /* subdiagonal part */
915   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
916 
917   /* copy x into the vec slvec0 */
918   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
919   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
920   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
921   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
922 
923   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
924   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
925   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
926 
927   /* supperdiagonal part */
928   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
929 
930   PetscFunctionReturn(0);
931 }
932 
933 #undef __FUNCT__
934 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
935 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
936 {
937   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
938   PetscErrorCode ierr;
939 
940   PetscFunctionBegin;
941   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
942   /* do diagonal part */
943   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
944   /* do supperdiagonal part */
945   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
946   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
947 
948   /* do subdiagonal part */
949   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
950   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
951   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
952 
953   PetscFunctionReturn(0);
954 }
955 
956 /*
957   This only works correctly for square matrices where the subblock A->A is the
958    diagonal block
959 */
960 #undef __FUNCT__
961 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
962 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
963 {
964   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
965   PetscErrorCode ierr;
966 
967   PetscFunctionBegin;
968   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
969   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
970   PetscFunctionReturn(0);
971 }
972 
973 #undef __FUNCT__
974 #define __FUNCT__ "MatScale_MPISBAIJ"
975 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
976 {
977   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
978   PetscErrorCode ierr;
979 
980   PetscFunctionBegin;
981   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
982   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
983   PetscFunctionReturn(0);
984 }
985 
986 #undef __FUNCT__
987 #define __FUNCT__ "MatGetRow_MPISBAIJ"
988 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
989 {
990   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
991   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
992   PetscErrorCode ierr;
993   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
994   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
995   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
996 
997   PetscFunctionBegin;
998   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
999   mat->getrowactive = PETSC_TRUE;
1000 
1001   if (!mat->rowvalues && (idx || v)) {
1002     /*
1003         allocate enough space to hold information from the longest row.
1004     */
1005     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1006     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1007     PetscInt     max = 1,mbs = mat->mbs,tmp;
1008     for (i=0; i<mbs; i++) {
1009       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1010       if (max < tmp) { max = tmp; }
1011     }
1012     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1013   }
1014 
1015   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1016   lrow = row - brstart;  /* local row index */
1017 
1018   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1019   if (!v)   {pvA = 0; pvB = 0;}
1020   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1021   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1022   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1023   nztot = nzA + nzB;
1024 
1025   cmap  = mat->garray;
1026   if (v  || idx) {
1027     if (nztot) {
1028       /* Sort by increasing column numbers, assuming A and B already sorted */
1029       PetscInt imark = -1;
1030       if (v) {
1031         *v = v_p = mat->rowvalues;
1032         for (i=0; i<nzB; i++) {
1033           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1034           else break;
1035         }
1036         imark = i;
1037         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1038         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1039       }
1040       if (idx) {
1041         *idx = idx_p = mat->rowindices;
1042         if (imark > -1) {
1043           for (i=0; i<imark; i++) {
1044             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1045           }
1046         } else {
1047           for (i=0; i<nzB; i++) {
1048             if (cmap[cworkB[i]/bs] < cstart)
1049               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1050             else break;
1051           }
1052           imark = i;
1053         }
1054         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1055         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1056       }
1057     } else {
1058       if (idx) *idx = 0;
1059       if (v)   *v   = 0;
1060     }
1061   }
1062   *nz = nztot;
1063   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1064   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1070 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1071 {
1072   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1073 
1074   PetscFunctionBegin;
1075   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1076   baij->getrowactive = PETSC_FALSE;
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 #undef __FUNCT__
1081 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1082 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1083 {
1084   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1085   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1086 
1087   PetscFunctionBegin;
1088   aA->getrow_utriangular = PETSC_TRUE;
1089   PetscFunctionReturn(0);
1090 }
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1093 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1094 {
1095   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1096   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1097 
1098   PetscFunctionBegin;
1099   aA->getrow_utriangular = PETSC_FALSE;
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1105 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1106 {
1107   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1112   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1118 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1119 {
1120   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1121   PetscErrorCode ierr;
1122 
1123   PetscFunctionBegin;
1124   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1125   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 #undef __FUNCT__
1130 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1131 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1132 {
1133   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1134   PetscErrorCode ierr;
1135 
1136   PetscFunctionBegin;
1137   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1138   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 #undef __FUNCT__
1143 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1144 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1145 {
1146   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1147   Mat            A = a->A,B = a->B;
1148   PetscErrorCode ierr;
1149   PetscReal      isend[5],irecv[5];
1150 
1151   PetscFunctionBegin;
1152   info->block_size     = (PetscReal)matin->rmap->bs;
1153   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1154   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1155   isend[3] = info->memory;  isend[4] = info->mallocs;
1156   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1157   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1158   isend[3] += info->memory;  isend[4] += info->mallocs;
1159   if (flag == MAT_LOCAL) {
1160     info->nz_used      = isend[0];
1161     info->nz_allocated = isend[1];
1162     info->nz_unneeded  = isend[2];
1163     info->memory       = isend[3];
1164     info->mallocs      = isend[4];
1165   } else if (flag == MAT_GLOBAL_MAX) {
1166     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1167     info->nz_used      = irecv[0];
1168     info->nz_allocated = irecv[1];
1169     info->nz_unneeded  = irecv[2];
1170     info->memory       = irecv[3];
1171     info->mallocs      = irecv[4];
1172   } else if (flag == MAT_GLOBAL_SUM) {
1173     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1174     info->nz_used      = irecv[0];
1175     info->nz_allocated = irecv[1];
1176     info->nz_unneeded  = irecv[2];
1177     info->memory       = irecv[3];
1178     info->mallocs      = irecv[4];
1179   } else {
1180     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1181   }
1182   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1183   info->fill_ratio_needed = 0;
1184   info->factor_mallocs    = 0;
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1190 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool  flg)
1191 {
1192   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1193   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1194   PetscErrorCode ierr;
1195 
1196   PetscFunctionBegin;
1197   switch (op) {
1198   case MAT_NEW_NONZERO_LOCATIONS:
1199   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1200   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1201   case MAT_KEEP_NONZERO_PATTERN:
1202   case MAT_NEW_NONZERO_LOCATION_ERR:
1203     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1204     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1205     break;
1206   case MAT_ROW_ORIENTED:
1207     a->roworiented = flg;
1208     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1209     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1210     break;
1211   case MAT_NEW_DIAGONALS:
1212     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1213     break;
1214   case MAT_IGNORE_OFF_PROC_ENTRIES:
1215     a->donotstash = flg;
1216     break;
1217   case MAT_USE_HASH_TABLE:
1218     a->ht_flag = flg;
1219     break;
1220   case MAT_HERMITIAN:
1221     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1222     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1223     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1224     break;
1225   case MAT_SPD:
1226     A->spd_set                         = PETSC_TRUE;
1227     A->spd                             = flg;
1228     if (flg) {
1229       A->symmetric                     = PETSC_TRUE;
1230       A->structurally_symmetric        = PETSC_TRUE;
1231       A->symmetric_set                 = PETSC_TRUE;
1232       A->structurally_symmetric_set    = PETSC_TRUE;
1233     }
1234     break;
1235   case MAT_SYMMETRIC:
1236     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1237     break;
1238   case MAT_STRUCTURALLY_SYMMETRIC:
1239     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1240     break;
1241   case MAT_SYMMETRY_ETERNAL:
1242     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1243     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1244     break;
1245   case MAT_IGNORE_LOWER_TRIANGULAR:
1246     aA->ignore_ltriangular = flg;
1247     break;
1248   case MAT_ERROR_LOWER_TRIANGULAR:
1249     aA->ignore_ltriangular = flg;
1250     break;
1251   case MAT_GETROW_UPPERTRIANGULAR:
1252     aA->getrow_utriangular = flg;
1253     break;
1254   default:
1255     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1256   }
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1262 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1263 {
1264   PetscErrorCode ierr;
1265   PetscFunctionBegin;
1266   if (MAT_INITIAL_MATRIX || *B != A) {
1267     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1268   }
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 #undef __FUNCT__
1273 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1274 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1275 {
1276   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1277   Mat            a=baij->A, b=baij->B;
1278   PetscErrorCode ierr;
1279   PetscInt       nv,m,n;
1280   PetscBool      flg;
1281 
1282   PetscFunctionBegin;
1283   if (ll != rr){
1284     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1285     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1286   }
1287   if (!ll) PetscFunctionReturn(0);
1288 
1289   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1290   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1291 
1292   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1293   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1294 
1295   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1296 
1297   /* left diagonalscale the off-diagonal part */
1298   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1299 
1300   /* scale the diagonal part */
1301   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1302 
1303   /* right diagonalscale the off-diagonal part */
1304   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1305   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 #undef __FUNCT__
1310 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1311 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1312 {
1313   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1314   PetscErrorCode ierr;
1315 
1316   PetscFunctionBegin;
1317   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1322 
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatEqual_MPISBAIJ"
1325 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1326 {
1327   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1328   Mat            a,b,c,d;
1329   PetscBool      flg;
1330   PetscErrorCode ierr;
1331 
1332   PetscFunctionBegin;
1333   a = matA->A; b = matA->B;
1334   c = matB->A; d = matB->B;
1335 
1336   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1337   if (flg) {
1338     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1339   }
1340   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 #undef __FUNCT__
1345 #define __FUNCT__ "MatCopy_MPISBAIJ"
1346 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1347 {
1348   PetscErrorCode ierr;
1349   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1350   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1351 
1352   PetscFunctionBegin;
1353   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1354   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1355     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1356     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1357     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1358   } else {
1359     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1360     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1361   }
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 #undef __FUNCT__
1366 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1367 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1368 {
1369   PetscErrorCode ierr;
1370 
1371   PetscFunctionBegin;
1372   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1373   PetscFunctionReturn(0);
1374 }
1375 
1376 #undef __FUNCT__
1377 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1378 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1379 {
1380   PetscErrorCode ierr;
1381   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1382   PetscBLASInt   bnz,one=1;
1383   Mat_SeqSBAIJ   *xa,*ya;
1384   Mat_SeqBAIJ    *xb,*yb;
1385 
1386   PetscFunctionBegin;
1387   if (str == SAME_NONZERO_PATTERN) {
1388     PetscScalar alpha = a;
1389     xa = (Mat_SeqSBAIJ *)xx->A->data;
1390     ya = (Mat_SeqSBAIJ *)yy->A->data;
1391     bnz = PetscBLASIntCast(xa->nz);
1392     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1393     xb = (Mat_SeqBAIJ *)xx->B->data;
1394     yb = (Mat_SeqBAIJ *)yy->B->data;
1395     bnz = PetscBLASIntCast(xb->nz);
1396     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1397   } else {
1398     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1399     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1400     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1401   }
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1407 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1408 {
1409   PetscErrorCode ierr;
1410   PetscInt       i;
1411   PetscBool      flg;
1412 
1413   PetscFunctionBegin;
1414   for (i=0; i<n; i++) {
1415     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1416     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1417   }
1418   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 
1423 /* -------------------------------------------------------------------*/
1424 static struct _MatOps MatOps_Values = {
1425        MatSetValues_MPISBAIJ,
1426        MatGetRow_MPISBAIJ,
1427        MatRestoreRow_MPISBAIJ,
1428        MatMult_MPISBAIJ,
1429 /* 4*/ MatMultAdd_MPISBAIJ,
1430        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1431        MatMultAdd_MPISBAIJ,
1432        0,
1433        0,
1434        0,
1435 /*10*/ 0,
1436        0,
1437        0,
1438        MatSOR_MPISBAIJ,
1439        MatTranspose_MPISBAIJ,
1440 /*15*/ MatGetInfo_MPISBAIJ,
1441        MatEqual_MPISBAIJ,
1442        MatGetDiagonal_MPISBAIJ,
1443        MatDiagonalScale_MPISBAIJ,
1444        MatNorm_MPISBAIJ,
1445 /*20*/ MatAssemblyBegin_MPISBAIJ,
1446        MatAssemblyEnd_MPISBAIJ,
1447        MatSetOption_MPISBAIJ,
1448        MatZeroEntries_MPISBAIJ,
1449 /*24*/ 0,
1450        0,
1451        0,
1452        0,
1453        0,
1454 /*29*/ MatSetUp_MPISBAIJ,
1455        0,
1456        0,
1457        0,
1458        0,
1459 /*34*/ MatDuplicate_MPISBAIJ,
1460        0,
1461        0,
1462        0,
1463        0,
1464 /*39*/ MatAXPY_MPISBAIJ,
1465        MatGetSubMatrices_MPISBAIJ,
1466        MatIncreaseOverlap_MPISBAIJ,
1467        MatGetValues_MPISBAIJ,
1468        MatCopy_MPISBAIJ,
1469 /*44*/ 0,
1470        MatScale_MPISBAIJ,
1471        0,
1472        0,
1473        0,
1474 /*49*/ 0,
1475        0,
1476        0,
1477        0,
1478        0,
1479 /*54*/ 0,
1480        0,
1481        MatSetUnfactored_MPISBAIJ,
1482        0,
1483        MatSetValuesBlocked_MPISBAIJ,
1484 /*59*/ 0,
1485        0,
1486        0,
1487        0,
1488        0,
1489 /*64*/ 0,
1490        0,
1491        0,
1492        0,
1493        0,
1494 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1495        0,
1496        0,
1497        0,
1498        0,
1499 /*74*/ 0,
1500        0,
1501        0,
1502        0,
1503        0,
1504 /*79*/ 0,
1505        0,
1506        0,
1507        0,
1508        MatLoad_MPISBAIJ,
1509 /*84*/ 0,
1510        0,
1511        0,
1512        0,
1513        0,
1514 /*89*/ 0,
1515        0,
1516        0,
1517        0,
1518        0,
1519 /*94*/ 0,
1520        0,
1521        0,
1522        0,
1523        0,
1524 /*99*/ 0,
1525        0,
1526        0,
1527        0,
1528        0,
1529 /*104*/0,
1530        MatRealPart_MPISBAIJ,
1531        MatImaginaryPart_MPISBAIJ,
1532        MatGetRowUpperTriangular_MPISBAIJ,
1533        MatRestoreRowUpperTriangular_MPISBAIJ,
1534 /*109*/0,
1535        0,
1536        0,
1537        0,
1538        0,
1539 /*114*/0,
1540        0,
1541        0,
1542        0,
1543        0,
1544 /*119*/0,
1545        0,
1546        0,
1547        0
1548 };
1549 
1550 
1551 EXTERN_C_BEGIN
1552 #undef __FUNCT__
1553 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1554 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1555 {
1556   PetscFunctionBegin;
1557   *a = ((Mat_MPISBAIJ *)A->data)->A;
1558   PetscFunctionReturn(0);
1559 }
1560 EXTERN_C_END
1561 
1562 EXTERN_C_BEGIN
1563 #undef __FUNCT__
1564 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1565 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1566 {
1567   Mat_MPISBAIJ   *b;
1568   PetscErrorCode ierr;
1569   PetscInt       i,mbs,Mbs;
1570 
1571   PetscFunctionBegin;
1572   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1573   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1574   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1575   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1576 
1577   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1578   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1579   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1580   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1581   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1582 
1583   if (d_nnz) {
1584     for (i=0; i<B->rmap->n/bs; i++) {
1585       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1586     }
1587   }
1588   if (o_nnz) {
1589     for (i=0; i<B->rmap->n/bs; i++) {
1590       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1591     }
1592   }
1593 
1594   b   = (Mat_MPISBAIJ*)B->data;
1595   mbs = B->rmap->n/bs;
1596   Mbs = B->rmap->N/bs;
1597   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1598 
1599   B->rmap->bs  = bs;
1600   b->bs2 = bs*bs;
1601   b->mbs = mbs;
1602   b->nbs = mbs;
1603   b->Mbs = Mbs;
1604   b->Nbs = Mbs;
1605 
1606   for (i=0; i<=b->size; i++) {
1607     b->rangebs[i] = B->rmap->range[i]/bs;
1608   }
1609   b->rstartbs = B->rmap->rstart/bs;
1610   b->rendbs   = B->rmap->rend/bs;
1611 
1612   b->cstartbs = B->cmap->rstart/bs;
1613   b->cendbs   = B->cmap->rend/bs;
1614 
1615   if (!B->preallocated) {
1616     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1617     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1618     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1619     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1620     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1621     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1622     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1623     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1624     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1625   }
1626 
1627   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1628   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1629   B->preallocated = PETSC_TRUE;
1630   PetscFunctionReturn(0);
1631 }
1632 EXTERN_C_END
1633 
1634 EXTERN_C_BEGIN
1635 #undef __FUNCT__
1636 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1637 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1638 {
1639   PetscInt       m,rstart,cstart,cend;
1640   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1641   const PetscInt *JJ=0;
1642   PetscScalar    *values=0;
1643   PetscErrorCode ierr;
1644 
1645   PetscFunctionBegin;
1646 
1647   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1648   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1649   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1650   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1651   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1652   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1653   m      = B->rmap->n/bs;
1654   rstart = B->rmap->rstart/bs;
1655   cstart = B->cmap->rstart/bs;
1656   cend   = B->cmap->rend/bs;
1657 
1658   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1659   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
1660   for (i=0; i<m; i++) {
1661     nz = ii[i+1] - ii[i];
1662     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1663     nz_max = PetscMax(nz_max,nz);
1664     JJ  = jj + ii[i];
1665     for (j=0; j<nz; j++) {
1666       if (*JJ >= cstart) break;
1667       JJ++;
1668     }
1669     d = 0;
1670     for (; j<nz; j++) {
1671       if (*JJ++ >= cend) break;
1672       d++;
1673     }
1674     d_nnz[i] = d;
1675     o_nnz[i] = nz - d;
1676   }
1677   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1678   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1679 
1680   values = (PetscScalar*)V;
1681   if (!values) {
1682     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1683     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1684   }
1685   for (i=0; i<m; i++) {
1686     PetscInt          row    = i + rstart;
1687     PetscInt          ncols  = ii[i+1] - ii[i];
1688     const PetscInt    *icols = jj + ii[i];
1689     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1690     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1691   }
1692 
1693   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1694   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1695   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1696   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1697   PetscFunctionReturn(0);
1698 }
1699 EXTERN_C_END
1700 
1701 EXTERN_C_BEGIN
1702 #if defined(PETSC_HAVE_MUMPS)
1703 extern PetscErrorCode  MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1704 #endif
1705 #if defined(PETSC_HAVE_PASTIX)
1706 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1707 #endif
1708 EXTERN_C_END
1709 
1710 /*MC
1711    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1712    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1713    the matrix is stored.
1714 
1715   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1716   can call MatSetOption(Mat, MAT_HERMITIAN);
1717 
1718    Options Database Keys:
1719 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1720 
1721   Level: beginner
1722 
1723 .seealso: MatCreateMPISBAIJ
1724 M*/
1725 
1726 EXTERN_C_BEGIN
1727 extern PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,const MatType,MatReuse,Mat*);
1728 EXTERN_C_END
1729 
1730 EXTERN_C_BEGIN
1731 #undef __FUNCT__
1732 #define __FUNCT__ "MatCreate_MPISBAIJ"
1733 PetscErrorCode  MatCreate_MPISBAIJ(Mat B)
1734 {
1735   Mat_MPISBAIJ   *b;
1736   PetscErrorCode ierr;
1737   PetscBool      flg;
1738 
1739   PetscFunctionBegin;
1740 
1741   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1742   B->data = (void*)b;
1743   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1744 
1745   B->ops->destroy    = MatDestroy_MPISBAIJ;
1746   B->ops->view       = MatView_MPISBAIJ;
1747   B->assembled       = PETSC_FALSE;
1748 
1749   B->insertmode = NOT_SET_VALUES;
1750   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1751   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1752 
1753   /* build local table of row and column ownerships */
1754   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1755 
1756   /* build cache for off array entries formed */
1757   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1758   b->donotstash  = PETSC_FALSE;
1759   b->colmap      = PETSC_NULL;
1760   b->garray      = PETSC_NULL;
1761   b->roworiented = PETSC_TRUE;
1762 
1763   /* stuff used in block assembly */
1764   b->barray       = 0;
1765 
1766   /* stuff used for matrix vector multiply */
1767   b->lvec         = 0;
1768   b->Mvctx        = 0;
1769   b->slvec0       = 0;
1770   b->slvec0b      = 0;
1771   b->slvec1       = 0;
1772   b->slvec1a      = 0;
1773   b->slvec1b      = 0;
1774   b->sMvctx       = 0;
1775 
1776   /* stuff for MatGetRow() */
1777   b->rowindices   = 0;
1778   b->rowvalues    = 0;
1779   b->getrowactive = PETSC_FALSE;
1780 
1781   /* hash table stuff */
1782   b->ht           = 0;
1783   b->hd           = 0;
1784   b->ht_size      = 0;
1785   b->ht_flag      = PETSC_FALSE;
1786   b->ht_fact      = 0;
1787   b->ht_total_ct  = 0;
1788   b->ht_insert_ct = 0;
1789 
1790   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1791   b->ijonly       = PETSC_FALSE;
1792 
1793   b->in_loc       = 0;
1794   b->v_loc        = 0;
1795   b->n_loc        = 0;
1796   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1797     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1798     if (flg) {
1799       PetscReal fact = 1.39;
1800       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1801       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1802       if (fact <= 1.0) fact = 1.39;
1803       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1804       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1805     }
1806   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1807 
1808 #if defined(PETSC_HAVE_PASTIX)
1809   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1810 					   "MatGetFactor_mpisbaij_pastix",
1811 					   MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1812 #endif
1813 #if defined(PETSC_HAVE_MUMPS)
1814   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1815                                      "MatGetFactor_sbaij_mumps",
1816                                      MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1817 #endif
1818   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1819                                      "MatStoreValues_MPISBAIJ",
1820                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1821   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1822                                      "MatRetrieveValues_MPISBAIJ",
1823                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1824   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1825                                      "MatGetDiagonalBlock_MPISBAIJ",
1826                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1827   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1828                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1829                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1830   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",
1831                                      "MatMPISBAIJSetPreallocationCSR_MPISBAIJ",
1832                                      MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1833   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",
1834                                      "MatConvert_MPISBAIJ_MPISBSTRM",
1835                                       MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1836 
1837   B->symmetric                  = PETSC_TRUE;
1838   B->structurally_symmetric     = PETSC_TRUE;
1839   B->symmetric_set              = PETSC_TRUE;
1840   B->structurally_symmetric_set = PETSC_TRUE;
1841   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1842   PetscFunctionReturn(0);
1843 }
1844 EXTERN_C_END
1845 
1846 /*MC
1847    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1848 
1849    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1850    and MATMPISBAIJ otherwise.
1851 
1852    Options Database Keys:
1853 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1854 
1855   Level: beginner
1856 
1857 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1858 M*/
1859 
1860 #undef __FUNCT__
1861 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1862 /*@C
1863    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1864    the user should preallocate the matrix storage by setting the parameters
1865    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1866    performance can be increased by more than a factor of 50.
1867 
1868    Collective on Mat
1869 
1870    Input Parameters:
1871 +  A - the matrix
1872 .  bs   - size of blockk
1873 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1874            submatrix  (same for all local rows)
1875 .  d_nnz - array containing the number of block nonzeros in the various block rows
1876            in the upper triangular and diagonal part of the in diagonal portion of the local
1877            (possibly different for each block row) or PETSC_NULL.  If you plan to factor the matrix you must leave room
1878            for the diagonal entry and set a value even if it is zero.
1879 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1880            submatrix (same for all local rows).
1881 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1882            off-diagonal portion of the local submatrix that is right of the diagonal
1883            (possibly different for each block row) or PETSC_NULL.
1884 
1885 
1886    Options Database Keys:
1887 .   -mat_no_unroll - uses code that does not unroll the loops in the
1888                      block calculations (much slower)
1889 .   -mat_block_size - size of the blocks to use
1890 
1891    Notes:
1892 
1893    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1894    than it must be used on all processors that share the object for that argument.
1895 
1896    If the *_nnz parameter is given then the *_nz parameter is ignored
1897 
1898    Storage Information:
1899    For a square global matrix we define each processor's diagonal portion
1900    to be its local rows and the corresponding columns (a square submatrix);
1901    each processor's off-diagonal portion encompasses the remainder of the
1902    local matrix (a rectangular submatrix).
1903 
1904    The user can specify preallocated storage for the diagonal part of
1905    the local submatrix with either d_nz or d_nnz (not both).  Set
1906    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1907    memory allocation.  Likewise, specify preallocated storage for the
1908    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1909 
1910    You can call MatGetInfo() to get information on how effective the preallocation was;
1911    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1912    You can also run with the option -info and look for messages with the string
1913    malloc in them to see if additional memory allocation was needed.
1914 
1915    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1916    the figure below we depict these three local rows and all columns (0-11).
1917 
1918 .vb
1919            0 1 2 3 4 5 6 7 8 9 10 11
1920           -------------------
1921    row 3  |  . . . d d d o o o o o o
1922    row 4  |  . . . d d d o o o o o o
1923    row 5  |  . . . d d d o o o o o o
1924           -------------------
1925 .ve
1926 
1927    Thus, any entries in the d locations are stored in the d (diagonal)
1928    submatrix, and any entries in the o locations are stored in the
1929    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1930    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1931 
1932    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1933    plus the diagonal part of the d matrix,
1934    and o_nz should indicate the number of block nonzeros per row in the o matrix
1935 
1936    In general, for PDE problems in which most nonzeros are near the diagonal,
1937    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1938    or you will get TERRIBLE performance; see the users' manual chapter on
1939    matrices.
1940 
1941    Level: intermediate
1942 
1943 .keywords: matrix, block, aij, compressed row, sparse, parallel
1944 
1945 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
1946 @*/
1947 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1948 {
1949   PetscErrorCode ierr;
1950 
1951   PetscFunctionBegin;
1952   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1953   PetscValidType(B,1);
1954   PetscValidLogicalCollectiveInt(B,bs,2);
1955   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1956   PetscFunctionReturn(0);
1957 }
1958 
1959 #undef __FUNCT__
1960 #define __FUNCT__ "MatCreateSBAIJ"
1961 /*@C
1962    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1963    (block compressed row).  For good matrix assembly performance
1964    the user should preallocate the matrix storage by setting the parameters
1965    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1966    performance can be increased by more than a factor of 50.
1967 
1968    Collective on MPI_Comm
1969 
1970    Input Parameters:
1971 +  comm - MPI communicator
1972 .  bs   - size of blockk
1973 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1974            This value should be the same as the local size used in creating the
1975            y vector for the matrix-vector product y = Ax.
1976 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1977            This value should be the same as the local size used in creating the
1978            x vector for the matrix-vector product y = Ax.
1979 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1980 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1981 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1982            submatrix  (same for all local rows)
1983 .  d_nnz - array containing the number of block nonzeros in the various block rows
1984            in the upper triangular portion of the in diagonal portion of the local
1985            (possibly different for each block block row) or PETSC_NULL.
1986            If you plan to factor the matrix you must leave room for the diagonal entry and
1987            set its value even if it is zero.
1988 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1989            submatrix (same for all local rows).
1990 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1991            off-diagonal portion of the local submatrix (possibly different for
1992            each block row) or PETSC_NULL.
1993 
1994    Output Parameter:
1995 .  A - the matrix
1996 
1997    Options Database Keys:
1998 .   -mat_no_unroll - uses code that does not unroll the loops in the
1999                      block calculations (much slower)
2000 .   -mat_block_size - size of the blocks to use
2001 .   -mat_mpi - use the parallel matrix data structures even on one processor
2002                (defaults to using SeqBAIJ format on one processor)
2003 
2004    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2005    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2006    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2007 
2008    Notes:
2009    The number of rows and columns must be divisible by blocksize.
2010    This matrix type does not support complex Hermitian operation.
2011 
2012    The user MUST specify either the local or global matrix dimensions
2013    (possibly both).
2014 
2015    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2016    than it must be used on all processors that share the object for that argument.
2017 
2018    If the *_nnz parameter is given then the *_nz parameter is ignored
2019 
2020    Storage Information:
2021    For a square global matrix we define each processor's diagonal portion
2022    to be its local rows and the corresponding columns (a square submatrix);
2023    each processor's off-diagonal portion encompasses the remainder of the
2024    local matrix (a rectangular submatrix).
2025 
2026    The user can specify preallocated storage for the diagonal part of
2027    the local submatrix with either d_nz or d_nnz (not both).  Set
2028    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2029    memory allocation.  Likewise, specify preallocated storage for the
2030    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2031 
2032    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2033    the figure below we depict these three local rows and all columns (0-11).
2034 
2035 .vb
2036            0 1 2 3 4 5 6 7 8 9 10 11
2037           -------------------
2038    row 3  |  . . . d d d o o o o o o
2039    row 4  |  . . . d d d o o o o o o
2040    row 5  |  . . . d d d o o o o o o
2041           -------------------
2042 .ve
2043 
2044    Thus, any entries in the d locations are stored in the d (diagonal)
2045    submatrix, and any entries in the o locations are stored in the
2046    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2047    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2048 
2049    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2050    plus the diagonal part of the d matrix,
2051    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2052    In general, for PDE problems in which most nonzeros are near the diagonal,
2053    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2054    or you will get TERRIBLE performance; see the users' manual chapter on
2055    matrices.
2056 
2057    Level: intermediate
2058 
2059 .keywords: matrix, block, aij, compressed row, sparse, parallel
2060 
2061 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2062 @*/
2063 
2064 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2065 {
2066   PetscErrorCode ierr;
2067   PetscMPIInt    size;
2068 
2069   PetscFunctionBegin;
2070   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2071   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2072   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2073   if (size > 1) {
2074     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2075     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2076   } else {
2077     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2078     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2079   }
2080   PetscFunctionReturn(0);
2081 }
2082 
2083 
2084 #undef __FUNCT__
2085 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2086 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2087 {
2088   Mat            mat;
2089   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2090   PetscErrorCode ierr;
2091   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2092   PetscScalar    *array;
2093 
2094   PetscFunctionBegin;
2095   *newmat       = 0;
2096   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2097   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2098   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2099   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2100   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2101   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2102 
2103   mat->factortype   = matin->factortype;
2104   mat->preallocated = PETSC_TRUE;
2105   mat->assembled    = PETSC_TRUE;
2106   mat->insertmode   = NOT_SET_VALUES;
2107 
2108   a = (Mat_MPISBAIJ*)mat->data;
2109   a->bs2   = oldmat->bs2;
2110   a->mbs   = oldmat->mbs;
2111   a->nbs   = oldmat->nbs;
2112   a->Mbs   = oldmat->Mbs;
2113   a->Nbs   = oldmat->Nbs;
2114 
2115 
2116   a->size         = oldmat->size;
2117   a->rank         = oldmat->rank;
2118   a->donotstash   = oldmat->donotstash;
2119   a->roworiented  = oldmat->roworiented;
2120   a->rowindices   = 0;
2121   a->rowvalues    = 0;
2122   a->getrowactive = PETSC_FALSE;
2123   a->barray       = 0;
2124   a->rstartbs    = oldmat->rstartbs;
2125   a->rendbs      = oldmat->rendbs;
2126   a->cstartbs    = oldmat->cstartbs;
2127   a->cendbs      = oldmat->cendbs;
2128 
2129   /* hash table stuff */
2130   a->ht           = 0;
2131   a->hd           = 0;
2132   a->ht_size      = 0;
2133   a->ht_flag      = oldmat->ht_flag;
2134   a->ht_fact      = oldmat->ht_fact;
2135   a->ht_total_ct  = 0;
2136   a->ht_insert_ct = 0;
2137 
2138   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2139   if (oldmat->colmap) {
2140 #if defined (PETSC_USE_CTABLE)
2141     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2142 #else
2143     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2144     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2145     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2146 #endif
2147   } else a->colmap = 0;
2148 
2149   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2150     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2151     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2152     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2153   } else a->garray = 0;
2154 
2155   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2156   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2157   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2158   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2159   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2160 
2161   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2162   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2163   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2164   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2165 
2166   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2167   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2168   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2169   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2170   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2171   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2172   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2173   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2174   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2175   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2176   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2177   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2178   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2179 
2180   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2181   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2182   a->sMvctx = oldmat->sMvctx;
2183   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2184 
2185   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2186   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2187   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2188   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2189   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2190   *newmat = mat;
2191   PetscFunctionReturn(0);
2192 }
2193 
2194 #undef __FUNCT__
2195 #define __FUNCT__ "MatLoad_MPISBAIJ"
2196 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2197 {
2198   PetscErrorCode ierr;
2199   PetscInt       i,nz,j,rstart,rend;
2200   PetscScalar    *vals,*buf;
2201   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2202   MPI_Status     status;
2203   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2204   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2205   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2206   PetscInt       bs=1,Mbs,mbs,extra_rows;
2207   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2208   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2209   int            fd;
2210 
2211   PetscFunctionBegin;
2212   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2213     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2214   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2215 
2216   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2217   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2218   if (!rank) {
2219     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2220     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2221     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2222     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2223   }
2224 
2225   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2226 
2227   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2228   M = header[1]; N = header[2];
2229 
2230   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2231   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2232   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2233 
2234   /* If global sizes are set, check if they are consistent with that given in the file */
2235   if (sizesset) {
2236     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2237   }
2238   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2239   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2240 
2241   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2242 
2243   /*
2244      This code adds extra rows to make sure the number of rows is
2245      divisible by the blocksize
2246   */
2247   Mbs        = M/bs;
2248   extra_rows = bs - M + bs*(Mbs);
2249   if (extra_rows == bs) extra_rows = 0;
2250   else                  Mbs++;
2251   if (extra_rows &&!rank) {
2252     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2253   }
2254 
2255   /* determine ownership of all rows */
2256   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2257     mbs        = Mbs/size + ((Mbs % size) > rank);
2258     m          = mbs*bs;
2259   } else { /* User Set */
2260     m          = newmat->rmap->n;
2261     mbs        = m/bs;
2262   }
2263   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2264   mmbs       = PetscMPIIntCast(mbs);
2265   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2266   rowners[0] = 0;
2267   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2268   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2269   rstart = rowners[rank];
2270   rend   = rowners[rank+1];
2271 
2272   /* distribute row lengths to all processors */
2273   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2274   if (!rank) {
2275     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2276     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2277     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2278     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2279     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2280     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2281     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2282   } else {
2283     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2284   }
2285 
2286   if (!rank) {   /* procs[0] */
2287     /* calculate the number of nonzeros on each processor */
2288     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2289     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2290     for (i=0; i<size; i++) {
2291       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2292         procsnz[i] += rowlengths[j];
2293       }
2294     }
2295     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2296 
2297     /* determine max buffer needed and allocate it */
2298     maxnz = 0;
2299     for (i=0; i<size; i++) {
2300       maxnz = PetscMax(maxnz,procsnz[i]);
2301     }
2302     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2303 
2304     /* read in my part of the matrix column indices  */
2305     nz     = procsnz[0];
2306     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2307     mycols = ibuf;
2308     if (size == 1)  nz -= extra_rows;
2309     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2310     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2311 
2312     /* read in every ones (except the last) and ship off */
2313     for (i=1; i<size-1; i++) {
2314       nz   = procsnz[i];
2315       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2316       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2317     }
2318     /* read in the stuff for the last proc */
2319     if (size != 1) {
2320       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2321       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2322       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2323       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2324     }
2325     ierr = PetscFree(cols);CHKERRQ(ierr);
2326   } else {  /* procs[i], i>0 */
2327     /* determine buffer space needed for message */
2328     nz = 0;
2329     for (i=0; i<m; i++) {
2330       nz += locrowlens[i];
2331     }
2332     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2333     mycols = ibuf;
2334     /* receive message of column indices*/
2335     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2336     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2337     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2338   }
2339 
2340   /* loop over local rows, determining number of off diagonal entries */
2341   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2342   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2343   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2344   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2345   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2346   rowcount = 0;
2347   nzcount  = 0;
2348   for (i=0; i<mbs; i++) {
2349     dcount  = 0;
2350     odcount = 0;
2351     for (j=0; j<bs; j++) {
2352       kmax = locrowlens[rowcount];
2353       for (k=0; k<kmax; k++) {
2354         tmp = mycols[nzcount++]/bs; /* block col. index */
2355         if (!mask[tmp]) {
2356           mask[tmp] = 1;
2357           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2358           else masked1[dcount++] = tmp; /* entry in diag portion */
2359         }
2360       }
2361       rowcount++;
2362     }
2363 
2364     dlens[i]  = dcount;  /* d_nzz[i] */
2365     odlens[i] = odcount; /* o_nzz[i] */
2366 
2367     /* zero out the mask elements we set */
2368     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2369     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2370   }
2371     if (!sizesset) {
2372     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2373   }
2374   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2375   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2376 
2377   if (!rank) {
2378     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2379     /* read in my part of the matrix numerical values  */
2380     nz = procsnz[0];
2381     vals = buf;
2382     mycols = ibuf;
2383     if (size == 1)  nz -= extra_rows;
2384     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2385     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2386 
2387     /* insert into matrix */
2388     jj      = rstart*bs;
2389     for (i=0; i<m; i++) {
2390       ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2391       mycols += locrowlens[i];
2392       vals   += locrowlens[i];
2393       jj++;
2394     }
2395 
2396     /* read in other processors (except the last one) and ship out */
2397     for (i=1; i<size-1; i++) {
2398       nz   = procsnz[i];
2399       vals = buf;
2400       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2401       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2402     }
2403     /* the last proc */
2404     if (size != 1){
2405       nz   = procsnz[i] - extra_rows;
2406       vals = buf;
2407       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2408       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2409       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2410     }
2411     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2412 
2413   } else {
2414     /* receive numeric values */
2415     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2416 
2417     /* receive message of values*/
2418     vals   = buf;
2419     mycols = ibuf;
2420     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2421     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2422     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2423 
2424     /* insert into matrix */
2425     jj      = rstart*bs;
2426     for (i=0; i<m; i++) {
2427       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2428       mycols += locrowlens[i];
2429       vals   += locrowlens[i];
2430       jj++;
2431     }
2432   }
2433 
2434   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2435   ierr = PetscFree(buf);CHKERRQ(ierr);
2436   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2437   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2438   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2439   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2440   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2441   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2442   PetscFunctionReturn(0);
2443 }
2444 
2445 #undef __FUNCT__
2446 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2447 /*XXXXX@
2448    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2449 
2450    Input Parameters:
2451 .  mat  - the matrix
2452 .  fact - factor
2453 
2454    Not Collective on Mat, each process can have a different hash factor
2455 
2456    Level: advanced
2457 
2458   Notes:
2459    This can also be set by the command line option: -mat_use_hash_table fact
2460 
2461 .keywords: matrix, hashtable, factor, HT
2462 
2463 .seealso: MatSetOption()
2464 @XXXXX*/
2465 
2466 
2467 #undef __FUNCT__
2468 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2469 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2470 {
2471   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2472   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2473   PetscReal      atmp;
2474   PetscReal      *work,*svalues,*rvalues;
2475   PetscErrorCode ierr;
2476   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2477   PetscMPIInt    rank,size;
2478   PetscInt       *rowners_bs,dest,count,source;
2479   PetscScalar    *va;
2480   MatScalar      *ba;
2481   MPI_Status     stat;
2482 
2483   PetscFunctionBegin;
2484   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2485   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2486   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2487 
2488   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2489   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2490 
2491   bs   = A->rmap->bs;
2492   mbs  = a->mbs;
2493   Mbs  = a->Mbs;
2494   ba   = b->a;
2495   bi   = b->i;
2496   bj   = b->j;
2497 
2498   /* find ownerships */
2499   rowners_bs = A->rmap->range;
2500 
2501   /* each proc creates an array to be distributed */
2502   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2503   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2504 
2505   /* row_max for B */
2506   if (rank != size-1){
2507     for (i=0; i<mbs; i++) {
2508       ncols = bi[1] - bi[0]; bi++;
2509       brow  = bs*i;
2510       for (j=0; j<ncols; j++){
2511         bcol = bs*(*bj);
2512         for (kcol=0; kcol<bs; kcol++){
2513           col = bcol + kcol;                 /* local col index */
2514           col += rowners_bs[rank+1];      /* global col index */
2515           for (krow=0; krow<bs; krow++){
2516             atmp = PetscAbsScalar(*ba); ba++;
2517             row = brow + krow;    /* local row index */
2518             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2519             if (work[col] < atmp) work[col] = atmp;
2520           }
2521         }
2522         bj++;
2523       }
2524     }
2525 
2526     /* send values to its owners */
2527     for (dest=rank+1; dest<size; dest++){
2528       svalues = work + rowners_bs[dest];
2529       count   = rowners_bs[dest+1]-rowners_bs[dest];
2530       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2531     }
2532   }
2533 
2534   /* receive values */
2535   if (rank){
2536     rvalues = work;
2537     count   = rowners_bs[rank+1]-rowners_bs[rank];
2538     for (source=0; source<rank; source++){
2539       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2540       /* process values */
2541       for (i=0; i<count; i++){
2542         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2543       }
2544     }
2545   }
2546 
2547   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2548   ierr = PetscFree(work);CHKERRQ(ierr);
2549   PetscFunctionReturn(0);
2550 }
2551 
2552 #undef __FUNCT__
2553 #define __FUNCT__ "MatSOR_MPISBAIJ"
2554 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2555 {
2556   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2557   PetscErrorCode    ierr;
2558   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2559   PetscScalar       *x,*ptr,*from;
2560   Vec               bb1;
2561   const PetscScalar *b;
2562 
2563   PetscFunctionBegin;
2564   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2565   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2566 
2567   if (flag == SOR_APPLY_UPPER) {
2568     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2569     PetscFunctionReturn(0);
2570   }
2571 
2572   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2573     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2574       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2575       its--;
2576     }
2577 
2578     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2579     while (its--){
2580 
2581       /* lower triangular part: slvec0b = - B^T*xx */
2582       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2583 
2584       /* copy xx into slvec0a */
2585       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2586       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2587       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2588       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2589 
2590       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2591 
2592       /* copy bb into slvec1a */
2593       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2594       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2595       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2596       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2597 
2598       /* set slvec1b = 0 */
2599       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2600 
2601       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2602       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2603       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2604       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2605 
2606       /* upper triangular part: bb1 = bb1 - B*x */
2607       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2608 
2609       /* local diagonal sweep */
2610       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2611     }
2612     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2613   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2614     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2615   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2616     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2617   } else if (flag & SOR_EISENSTAT) {
2618     Vec               xx1;
2619     PetscBool         hasop;
2620     const PetscScalar *diag;
2621     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2622     PetscInt          i,n;
2623 
2624     if (!mat->xx1) {
2625       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2626       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2627     }
2628     xx1 = mat->xx1;
2629     bb1 = mat->bb1;
2630 
2631     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2632 
2633     if (!mat->diag) {
2634       /* this is wrong for same matrix with new nonzero values */
2635       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2636       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2637     }
2638     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2639 
2640     if (hasop) {
2641       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2642       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2643     } else {
2644       /*
2645           These two lines are replaced by code that may be a bit faster for a good compiler
2646       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2647       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2648       */
2649       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2650       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2651       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2652       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2653       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2654       if (omega == 1.0) {
2655 	for (i=0; i<n; i++) {
2656 	  sl[i] = b[i] - diag[i]*x[i];
2657 	}
2658         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2659       } else {
2660 	for (i=0; i<n; i++) {
2661 	  sl[i] = b[i] + scale*diag[i]*x[i];
2662 	}
2663         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2664       }
2665       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2666       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2667       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2668       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2669     }
2670 
2671     /* multiply off-diagonal portion of matrix */
2672     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2673     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2674     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2675     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2676     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2677     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2678     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2679     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2680     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2681     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2682 
2683     /* local sweep */
2684     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2685     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2686   } else {
2687     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2688   }
2689   PetscFunctionReturn(0);
2690 }
2691 
2692 #undef __FUNCT__
2693 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2694 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2695 {
2696   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2697   PetscErrorCode ierr;
2698   Vec            lvec1,bb1;
2699 
2700   PetscFunctionBegin;
2701   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2702   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2703 
2704   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2705     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2706       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2707       its--;
2708     }
2709 
2710     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2711     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2712     while (its--){
2713       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2714 
2715       /* lower diagonal part: bb1 = bb - B^T*xx */
2716       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2717       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2718 
2719       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2720       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2721       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2722 
2723       /* upper diagonal part: bb1 = bb1 - B*x */
2724       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2725       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2726 
2727       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2728 
2729       /* diagonal sweep */
2730       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2731     }
2732     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2733     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2734   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2735   PetscFunctionReturn(0);
2736 }
2737 
2738 #undef __FUNCT__
2739 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2740 /*@
2741      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2742          CSR format the local rows.
2743 
2744    Collective on MPI_Comm
2745 
2746    Input Parameters:
2747 +  comm - MPI communicator
2748 .  bs - the block size, only a block size of 1 is supported
2749 .  m - number of local rows (Cannot be PETSC_DECIDE)
2750 .  n - This value should be the same as the local size used in creating the
2751        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2752        calculated if N is given) For square matrices n is almost always m.
2753 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2754 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2755 .   i - row indices
2756 .   j - column indices
2757 -   a - matrix values
2758 
2759    Output Parameter:
2760 .   mat - the matrix
2761 
2762    Level: intermediate
2763 
2764    Notes:
2765        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2766      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2767      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2768 
2769        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2770 
2771 .keywords: matrix, aij, compressed row, sparse, parallel
2772 
2773 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2774           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2775 @*/
2776 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2777 {
2778   PetscErrorCode ierr;
2779 
2780 
2781  PetscFunctionBegin;
2782   if (i[0]) {
2783     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2784   }
2785   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2786   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2787   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2788   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2789   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2790   PetscFunctionReturn(0);
2791 }
2792 
2793 
2794 #undef __FUNCT__
2795 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2796 /*@C
2797    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2798    (the default parallel PETSc format).
2799 
2800    Collective on MPI_Comm
2801 
2802    Input Parameters:
2803 +  A - the matrix
2804 .  bs - the block size
2805 .  i - the indices into j for the start of each local row (starts with zero)
2806 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2807 -  v - optional values in the matrix
2808 
2809    Level: developer
2810 
2811 .keywords: matrix, aij, compressed row, sparse, parallel
2812 
2813 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2814 @*/
2815 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2816 {
2817   PetscErrorCode ierr;
2818 
2819   PetscFunctionBegin;
2820   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2821   PetscFunctionReturn(0);
2822 }
2823 
2824 
2825