xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 4e6ef68f07cf4e7ffee1b93dbc438d90283d533b)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
25 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
26 {
27   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
28   PetscErrorCode ierr;
29 
30   PetscFunctionBegin;
31   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
32   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
33   PetscFunctionReturn(0);
34 }
35 
36 #undef __FUNCT__
37 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
38 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
39 {
40   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
41   PetscErrorCode ierr;
42 
43   PetscFunctionBegin;
44   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
45   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
46   PetscFunctionReturn(0);
47 }
48 
49 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
50   { \
51  \
52     brow = row/bs;  \
53     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
54     rmax = aimax[brow]; nrow = ailen[brow]; \
55     bcol = col/bs; \
56     ridx = row % bs; cidx = col % bs; \
57     low  = 0; high = nrow; \
58     while (high-low > 3) { \
59       t = (low+high)/2; \
60       if (rp[t] > bcol) high = t; \
61       else              low  = t; \
62     } \
63     for (_i=low; _i<high; _i++) { \
64       if (rp[_i] > bcol) break; \
65       if (rp[_i] == bcol) { \
66         bap = ap + bs2*_i + bs*cidx + ridx; \
67         if (addv == ADD_VALUES) *bap += value;  \
68         else                    *bap  = value;  \
69         goto a_noinsert; \
70       } \
71     } \
72     if (a->nonew == 1) goto a_noinsert; \
73     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
74     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
75     N = nrow++ - 1;  \
76     /* shift up all the later entries in this row */ \
77     for (ii=N; ii>=_i; ii--) { \
78       rp[ii+1] = rp[ii]; \
79       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
80     } \
81     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
82     rp[_i]                      = bcol;  \
83     ap[bs2*_i + bs*cidx + ridx] = value;  \
84     A->nonzerostate++;\
85 a_noinsert:; \
86     ailen[brow] = nrow; \
87   }
88 
89 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
90   { \
91     brow = row/bs;  \
92     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
93     rmax = bimax[brow]; nrow = bilen[brow]; \
94     bcol = col/bs; \
95     ridx = row % bs; cidx = col % bs; \
96     low  = 0; high = nrow; \
97     while (high-low > 3) { \
98       t = (low+high)/2; \
99       if (rp[t] > bcol) high = t; \
100       else              low  = t; \
101     } \
102     for (_i=low; _i<high; _i++) { \
103       if (rp[_i] > bcol) break; \
104       if (rp[_i] == bcol) { \
105         bap = ap + bs2*_i + bs*cidx + ridx; \
106         if (addv == ADD_VALUES) *bap += value;  \
107         else                    *bap  = value;  \
108         goto b_noinsert; \
109       } \
110     } \
111     if (b->nonew == 1) goto b_noinsert; \
112     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
113     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
114     N = nrow++ - 1;  \
115     /* shift up all the later entries in this row */ \
116     for (ii=N; ii>=_i; ii--) { \
117       rp[ii+1] = rp[ii]; \
118       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
119     } \
120     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
121     rp[_i]                      = bcol;  \
122     ap[bs2*_i + bs*cidx + ridx] = value;  \
123     B->nonzerostate++;\
124 b_noinsert:; \
125     bilen[brow] = nrow; \
126   }
127 
128 /* Only add/insert a(i,j) with i<=j (blocks).
129    Any a(i,j) with i>j input by user is ingored.
130 */
131 #undef __FUNCT__
132 #define __FUNCT__ "MatSetValues_MPISBAIJ"
133 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
134 {
135   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
136   MatScalar      value;
137   PetscBool      roworiented = baij->roworiented;
138   PetscErrorCode ierr;
139   PetscInt       i,j,row,col;
140   PetscInt       rstart_orig=mat->rmap->rstart;
141   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
142   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
143 
144   /* Some Variables required in the macro */
145   Mat          A     = baij->A;
146   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
147   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
148   MatScalar    *aa   =a->a;
149 
150   Mat         B     = baij->B;
151   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
152   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
153   MatScalar   *ba   =b->a;
154 
155   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
156   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
157   MatScalar *ap,*bap;
158 
159   /* for stash */
160   PetscInt  n_loc, *in_loc = NULL;
161   MatScalar *v_loc = NULL;
162 
163   PetscFunctionBegin;
164   if (v) PetscValidScalarPointer(v,6);
165   if (!baij->donotstash) {
166     if (n > baij->n_loc) {
167       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
168       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
169       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
170       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
171 
172       baij->n_loc = n;
173     }
174     in_loc = baij->in_loc;
175     v_loc  = baij->v_loc;
176   }
177 
178   for (i=0; i<m; i++) {
179     if (im[i] < 0) continue;
180 #if defined(PETSC_USE_DEBUG)
181     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
182 #endif
183     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
184       row = im[i] - rstart_orig;              /* local row index */
185       for (j=0; j<n; j++) {
186         if (im[i]/bs > in[j]/bs) {
187           if (a->ignore_ltriangular) {
188             continue;    /* ignore lower triangular blocks */
189           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
190         }
191         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
192           col  = in[j] - cstart_orig;         /* local col index */
193           brow = row/bs; bcol = col/bs;
194           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
195           if (roworiented) value = v[i*n+j];
196           else             value = v[i+j*m];
197           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
198           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
199         } else if (in[j] < 0) continue;
200 #if defined(PETSC_USE_DEBUG)
201         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
202 #endif
203         else {  /* off-diag entry (B) */
204           if (mat->was_assembled) {
205             if (!baij->colmap) {
206               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
207             }
208 #if defined(PETSC_USE_CTABLE)
209             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
210             col  = col - 1;
211 #else
212             col = baij->colmap[in[j]/bs] - 1;
213 #endif
214             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
215               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
216               col  =  in[j];
217               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
218               B    = baij->B;
219               b    = (Mat_SeqBAIJ*)(B)->data;
220               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
221               ba   = b->a;
222             } else col += in[j]%bs;
223           } else col = in[j];
224           if (roworiented) value = v[i*n+j];
225           else             value = v[i+j*m];
226           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
227           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
228         }
229       }
230     } else {  /* off processor entry */
231       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
232       if (!baij->donotstash) {
233         mat->assembled = PETSC_FALSE;
234         n_loc          = 0;
235         for (j=0; j<n; j++) {
236           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
237           in_loc[n_loc] = in[j];
238           if (roworiented) {
239             v_loc[n_loc] = v[i*n+j];
240           } else {
241             v_loc[n_loc] = v[j*m+i];
242           }
243           n_loc++;
244         }
245         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
246       }
247     }
248   }
249   PetscFunctionReturn(0);
250 }
251 
252 #undef __FUNCT__
253 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
254 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
255 {
256   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
257   const MatScalar *value;
258   MatScalar       *barray     =baij->barray;
259   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
260   PetscErrorCode  ierr;
261   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
262   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
263   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
264 
265   PetscFunctionBegin;
266   if (!barray) {
267     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
268     baij->barray = barray;
269   }
270 
271   if (roworiented) {
272     stepval = (n-1)*bs;
273   } else {
274     stepval = (m-1)*bs;
275   }
276   for (i=0; i<m; i++) {
277     if (im[i] < 0) continue;
278 #if defined(PETSC_USE_DEBUG)
279     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
280 #endif
281     if (im[i] >= rstart && im[i] < rend) {
282       row = im[i] - rstart;
283       for (j=0; j<n; j++) {
284         if (im[i] > in[j]) {
285           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
286           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
287         }
288         /* If NumCol = 1 then a copy is not required */
289         if ((roworiented) && (n == 1)) {
290           barray = (MatScalar*) v + i*bs2;
291         } else if ((!roworiented) && (m == 1)) {
292           barray = (MatScalar*) v + j*bs2;
293         } else { /* Here a copy is required */
294           if (roworiented) {
295             value = v + i*(stepval+bs)*bs + j*bs;
296           } else {
297             value = v + j*(stepval+bs)*bs + i*bs;
298           }
299           for (ii=0; ii<bs; ii++,value+=stepval) {
300             for (jj=0; jj<bs; jj++) {
301               *barray++ = *value++;
302             }
303           }
304           barray -=bs2;
305         }
306 
307         if (in[j] >= cstart && in[j] < cend) {
308           col  = in[j] - cstart;
309           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
310         } else if (in[j] < 0) continue;
311 #if defined(PETSC_USE_DEBUG)
312         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
313 #endif
314         else {
315           if (mat->was_assembled) {
316             if (!baij->colmap) {
317               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
318             }
319 
320 #if defined(PETSC_USE_DEBUG)
321 #if defined(PETSC_USE_CTABLE)
322             { PetscInt data;
323               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
324               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
325             }
326 #else
327             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
328 #endif
329 #endif
330 #if defined(PETSC_USE_CTABLE)
331             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
332             col  = (col - 1)/bs;
333 #else
334             col = (baij->colmap[in[j]] - 1)/bs;
335 #endif
336             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
337               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
338               col  = in[j];
339             }
340           } else col = in[j];
341           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
342         }
343       }
344     } else {
345       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
346       if (!baij->donotstash) {
347         if (roworiented) {
348           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
349         } else {
350           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
351         }
352       }
353     }
354   }
355   PetscFunctionReturn(0);
356 }
357 
358 #undef __FUNCT__
359 #define __FUNCT__ "MatGetValues_MPISBAIJ"
360 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
361 {
362   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
363   PetscErrorCode ierr;
364   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
365   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
366 
367   PetscFunctionBegin;
368   for (i=0; i<m; i++) {
369     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
370     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
371     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
372       row = idxm[i] - bsrstart;
373       for (j=0; j<n; j++) {
374         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
375         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
376         if (idxn[j] >= bscstart && idxn[j] < bscend) {
377           col  = idxn[j] - bscstart;
378           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
379         } else {
380           if (!baij->colmap) {
381             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
382           }
383 #if defined(PETSC_USE_CTABLE)
384           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
385           data--;
386 #else
387           data = baij->colmap[idxn[j]/bs]-1;
388 #endif
389           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
390           else {
391             col  = data + idxn[j]%bs;
392             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
393           }
394         }
395       }
396     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
397   }
398   PetscFunctionReturn(0);
399 }
400 
401 #undef __FUNCT__
402 #define __FUNCT__ "MatNorm_MPISBAIJ"
403 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
404 {
405   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
406   PetscErrorCode ierr;
407   PetscReal      sum[2],*lnorm2;
408 
409   PetscFunctionBegin;
410   if (baij->size == 1) {
411     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
412   } else {
413     if (type == NORM_FROBENIUS) {
414       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
415       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
416       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
417       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
418       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
419       ierr    = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
420       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
421       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
422     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
423       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
424       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
425       PetscReal    *rsum,*rsum2,vabs;
426       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
427       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
428       MatScalar    *v;
429 
430       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
431       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
432       /* Amat */
433       v = amat->a; jj = amat->j;
434       for (brow=0; brow<mbs; brow++) {
435         grow = bs*(rstart + brow);
436         nz   = amat->i[brow+1] - amat->i[brow];
437         for (bcol=0; bcol<nz; bcol++) {
438           gcol = bs*(rstart + *jj); jj++;
439           for (col=0; col<bs; col++) {
440             for (row=0; row<bs; row++) {
441               vabs            = PetscAbsScalar(*v); v++;
442               rsum[gcol+col] += vabs;
443               /* non-diagonal block */
444               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
445             }
446           }
447         }
448       }
449       /* Bmat */
450       v = bmat->a; jj = bmat->j;
451       for (brow=0; brow<mbs; brow++) {
452         grow = bs*(rstart + brow);
453         nz = bmat->i[brow+1] - bmat->i[brow];
454         for (bcol=0; bcol<nz; bcol++) {
455           gcol = bs*garray[*jj]; jj++;
456           for (col=0; col<bs; col++) {
457             for (row=0; row<bs; row++) {
458               vabs            = PetscAbsScalar(*v); v++;
459               rsum[gcol+col] += vabs;
460               rsum[grow+row] += vabs;
461             }
462           }
463         }
464       }
465       ierr  = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
466       *norm = 0.0;
467       for (col=0; col<mat->cmap->N; col++) {
468         if (rsum2[col] > *norm) *norm = rsum2[col];
469       }
470       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
471     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
472   }
473   PetscFunctionReturn(0);
474 }
475 
476 #undef __FUNCT__
477 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
478 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
479 {
480   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
481   PetscErrorCode ierr;
482   PetscInt       nstash,reallocs;
483   InsertMode     addv;
484 
485   PetscFunctionBegin;
486   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
487 
488   /* make sure all processors are either in INSERTMODE or ADDMODE */
489   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
490   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
491   mat->insertmode = addv; /* in case this processor had no cache */
492 
493   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
494   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
495   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
496   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
497   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
498   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
499   PetscFunctionReturn(0);
500 }
501 
502 #undef __FUNCT__
503 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
504 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
505 {
506   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
507   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
508   PetscErrorCode ierr;
509   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
510   PetscInt       *row,*col;
511   PetscBool      other_disassembled;
512   PetscMPIInt    n;
513   PetscBool      r1,r2,r3;
514   MatScalar      *val;
515   InsertMode     addv = mat->insertmode;
516 
517   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
518   PetscFunctionBegin;
519   if (!baij->donotstash &&  !mat->nooffprocentries) {
520     while (1) {
521       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
522       if (!flg) break;
523 
524       for (i=0; i<n;) {
525         /* Now identify the consecutive vals belonging to the same row */
526         for (j=i,rstart=row[j]; j<n; j++) {
527           if (row[j] != rstart) break;
528         }
529         if (j < n) ncols = j-i;
530         else       ncols = n-i;
531         /* Now assemble all these values with a single function call */
532         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
533         i    = j;
534       }
535     }
536     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
537     /* Now process the block-stash. Since the values are stashed column-oriented,
538        set the roworiented flag to column oriented, and after MatSetValues()
539        restore the original flags */
540     r1 = baij->roworiented;
541     r2 = a->roworiented;
542     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
543 
544     baij->roworiented = PETSC_FALSE;
545     a->roworiented    = PETSC_FALSE;
546 
547     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
548     while (1) {
549       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
550       if (!flg) break;
551 
552       for (i=0; i<n;) {
553         /* Now identify the consecutive vals belonging to the same row */
554         for (j=i,rstart=row[j]; j<n; j++) {
555           if (row[j] != rstart) break;
556         }
557         if (j < n) ncols = j-i;
558         else       ncols = n-i;
559         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
560         i    = j;
561       }
562     }
563     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
564 
565     baij->roworiented = r1;
566     a->roworiented    = r2;
567 
568     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
569   }
570 
571   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
572   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
573 
574   /* determine if any processor has disassembled, if so we must
575      also disassemble ourselfs, in order that we may reassemble. */
576   /*
577      if nonzero structure of submatrix B cannot change then we know that
578      no processor disassembled thus we can skip this stuff
579   */
580   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
581     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
582     if (mat->was_assembled && !other_disassembled) {
583       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
584     }
585   }
586 
587   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
588     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
589   }
590   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
591   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
592 
593   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
594 
595   baij->rowvalues = 0;
596   {
597     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
598     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
599   }
600   PetscFunctionReturn(0);
601 }
602 
603 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
604 #include <petscdraw.h>
605 #undef __FUNCT__
606 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
607 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
608 {
609   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
610   PetscErrorCode    ierr;
611   PetscInt          bs   = mat->rmap->bs;
612   PetscMPIInt       size = baij->size,rank = baij->rank;
613   PetscBool         iascii,isdraw;
614   PetscViewer       sviewer;
615   PetscViewerFormat format;
616 
617   PetscFunctionBegin;
618   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
619   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
620   if (iascii) {
621     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
622     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
623       MatInfo info;
624       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
625       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
626       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
627       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
628       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
629       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
630       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
631       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
632       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
633       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
634       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
635       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
636       PetscFunctionReturn(0);
637     } else if (format == PETSC_VIEWER_ASCII_INFO) {
638       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
639       PetscFunctionReturn(0);
640     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
641       PetscFunctionReturn(0);
642     }
643   }
644 
645   if (isdraw) {
646     PetscDraw draw;
647     PetscBool isnull;
648     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
649     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
650   }
651 
652   if (size == 1) {
653     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
654     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
655   } else {
656     /* assemble the entire matrix onto first processor. */
657     Mat          A;
658     Mat_SeqSBAIJ *Aloc;
659     Mat_SeqBAIJ  *Bloc;
660     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
661     MatScalar    *a;
662 
663     /* Should this be the same type as mat? */
664     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
665     if (!rank) {
666       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
667     } else {
668       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
669     }
670     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
671     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
672     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
673     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
674 
675     /* copy over the A part */
676     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
677     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
678     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
679 
680     for (i=0; i<mbs; i++) {
681       rvals[0] = bs*(baij->rstartbs + i);
682       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
683       for (j=ai[i]; j<ai[i+1]; j++) {
684         col = (baij->cstartbs+aj[j])*bs;
685         for (k=0; k<bs; k++) {
686           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
687           col++;
688           a += bs;
689         }
690       }
691     }
692     /* copy over the B part */
693     Bloc = (Mat_SeqBAIJ*)baij->B->data;
694     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
695     for (i=0; i<mbs; i++) {
696 
697       rvals[0] = bs*(baij->rstartbs + i);
698       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
699       for (j=ai[i]; j<ai[i+1]; j++) {
700         col = baij->garray[aj[j]]*bs;
701         for (k=0; k<bs; k++) {
702           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
703           col++;
704           a += bs;
705         }
706       }
707     }
708     ierr = PetscFree(rvals);CHKERRQ(ierr);
709     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
710     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
711     /*
712        Everyone has to call to draw the matrix since the graphics waits are
713        synchronized across all processors that share the PetscDraw object
714     */
715     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
716     if (!rank) {
717       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
718       /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/
719       ierr = PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ);CHKERRQ(ierr);
720       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
721     }
722     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
723     ierr = MatDestroy(&A);CHKERRQ(ierr);
724   }
725   PetscFunctionReturn(0);
726 }
727 
728 #undef __FUNCT__
729 #define __FUNCT__ "MatView_MPISBAIJ"
730 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
731 {
732   PetscErrorCode ierr;
733   PetscBool      iascii,isdraw,issocket,isbinary;
734 
735   PetscFunctionBegin;
736   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
737   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
738   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
739   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
740   if (iascii || isdraw || issocket || isbinary) {
741     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
742   }
743   PetscFunctionReturn(0);
744 }
745 
746 #undef __FUNCT__
747 #define __FUNCT__ "MatDestroy_MPISBAIJ"
748 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
749 {
750   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
751   PetscErrorCode ierr;
752 
753   PetscFunctionBegin;
754 #if defined(PETSC_USE_LOG)
755   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
756 #endif
757   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
758   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
759   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
760   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
761 #if defined(PETSC_USE_CTABLE)
762   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
763 #else
764   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
765 #endif
766   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
767   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
768   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
769   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
770   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
771   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
772   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
773   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
774   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
775   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
776   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
777   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
778   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
779   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
780   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
781 #if defined(PETSC_USE_REAL_MAT_SINGLE)
782   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
783 #endif
784   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
785   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
786   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
787   ierr = PetscFree(mat->data);CHKERRQ(ierr);
788 
789   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
793   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
794   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
795   PetscFunctionReturn(0);
796 }
797 
798 #undef __FUNCT__
799 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
800 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
801 {
802   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
803   PetscErrorCode ierr;
804   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
805   PetscScalar    *x,*from;
806 
807   PetscFunctionBegin;
808   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
809   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
810 
811   /* diagonal part */
812   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
813   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
814 
815   /* subdiagonal part */
816   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
817 
818   /* copy x into the vec slvec0 */
819   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
820   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
821 
822   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
823   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
824   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
825 
826   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
827   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
828   /* supperdiagonal part */
829   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
830   PetscFunctionReturn(0);
831 }
832 
833 #undef __FUNCT__
834 #define __FUNCT__ "MatMult_MPISBAIJ"
835 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
836 {
837   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
838   PetscErrorCode ierr;
839   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
840   PetscScalar    *x,*from;
841 
842   PetscFunctionBegin;
843   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
844   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
845 
846   /* diagonal part */
847   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
848   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
849 
850   /* subdiagonal part */
851   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
852 
853   /* copy x into the vec slvec0 */
854   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
855   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
856 
857   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
858   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
859   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
860 
861   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
863   /* supperdiagonal part */
864   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
865   PetscFunctionReturn(0);
866 }
867 
868 #undef __FUNCT__
869 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
870 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
871 {
872   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
873   PetscErrorCode ierr;
874   PetscInt       nt;
875 
876   PetscFunctionBegin;
877   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
878   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
879 
880   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
881   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
882 
883   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
884   /* do diagonal part */
885   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
886   /* do supperdiagonal part */
887   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
888   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
889   /* do subdiagonal part */
890   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
891   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
892   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
893   PetscFunctionReturn(0);
894 }
895 
896 #undef __FUNCT__
897 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
898 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
899 {
900   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
901   PetscErrorCode ierr;
902   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
903   PetscScalar    *x,*from,zero=0.0;
904 
905   PetscFunctionBegin;
906   /*
907   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
908   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
909   */
910   /* diagonal part */
911   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
912   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
913 
914   /* subdiagonal part */
915   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
916 
917   /* copy x into the vec slvec0 */
918   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
919   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
920   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
921   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
922 
923   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
924   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
925   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
926 
927   /* supperdiagonal part */
928   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
929   PetscFunctionReturn(0);
930 }
931 
932 #undef __FUNCT__
933 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
934 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
935 {
936   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
937   PetscErrorCode ierr;
938 
939   PetscFunctionBegin;
940   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
941   /* do diagonal part */
942   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
943   /* do supperdiagonal part */
944   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
946 
947   /* do subdiagonal part */
948   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
949   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
950   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
951   PetscFunctionReturn(0);
952 }
953 
954 /*
955   This only works correctly for square matrices where the subblock A->A is the
956    diagonal block
957 */
958 #undef __FUNCT__
959 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
960 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
961 {
962   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
963   PetscErrorCode ierr;
964 
965   PetscFunctionBegin;
966   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
967   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
968   PetscFunctionReturn(0);
969 }
970 
971 #undef __FUNCT__
972 #define __FUNCT__ "MatScale_MPISBAIJ"
973 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
974 {
975   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
976   PetscErrorCode ierr;
977 
978   PetscFunctionBegin;
979   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
980   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatGetRow_MPISBAIJ"
986 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
987 {
988   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
989   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
990   PetscErrorCode ierr;
991   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
992   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
993   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
994 
995   PetscFunctionBegin;
996   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
997   mat->getrowactive = PETSC_TRUE;
998 
999   if (!mat->rowvalues && (idx || v)) {
1000     /*
1001         allocate enough space to hold information from the longest row.
1002     */
1003     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1004     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1005     PetscInt     max = 1,mbs = mat->mbs,tmp;
1006     for (i=0; i<mbs; i++) {
1007       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1008       if (max < tmp) max = tmp;
1009     }
1010     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1011   }
1012 
1013   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1014   lrow = row - brstart;  /* local row index */
1015 
1016   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1017   if (!v)   {pvA = 0; pvB = 0;}
1018   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1019   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1020   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1021   nztot = nzA + nzB;
1022 
1023   cmap = mat->garray;
1024   if (v  || idx) {
1025     if (nztot) {
1026       /* Sort by increasing column numbers, assuming A and B already sorted */
1027       PetscInt imark = -1;
1028       if (v) {
1029         *v = v_p = mat->rowvalues;
1030         for (i=0; i<nzB; i++) {
1031           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1032           else break;
1033         }
1034         imark = i;
1035         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1036         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1037       }
1038       if (idx) {
1039         *idx = idx_p = mat->rowindices;
1040         if (imark > -1) {
1041           for (i=0; i<imark; i++) {
1042             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1043           }
1044         } else {
1045           for (i=0; i<nzB; i++) {
1046             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1047             else break;
1048           }
1049           imark = i;
1050         }
1051         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1052         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1053       }
1054     } else {
1055       if (idx) *idx = 0;
1056       if (v)   *v   = 0;
1057     }
1058   }
1059   *nz  = nztot;
1060   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1061   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 #undef __FUNCT__
1066 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1067 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1068 {
1069   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1070 
1071   PetscFunctionBegin;
1072   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1073   baij->getrowactive = PETSC_FALSE;
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1079 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1080 {
1081   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1082   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1083 
1084   PetscFunctionBegin;
1085   aA->getrow_utriangular = PETSC_TRUE;
1086   PetscFunctionReturn(0);
1087 }
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1090 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1091 {
1092   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1093   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1094 
1095   PetscFunctionBegin;
1096   aA->getrow_utriangular = PETSC_FALSE;
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1102 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1103 {
1104   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1105   PetscErrorCode ierr;
1106 
1107   PetscFunctionBegin;
1108   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1109   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1110   PetscFunctionReturn(0);
1111 }
1112 
1113 #undef __FUNCT__
1114 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1115 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1116 {
1117   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1118   PetscErrorCode ierr;
1119 
1120   PetscFunctionBegin;
1121   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1122   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 #undef __FUNCT__
1127 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1128 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1129 {
1130   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1131   PetscErrorCode ierr;
1132 
1133   PetscFunctionBegin;
1134   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1135   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 #undef __FUNCT__
1140 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1141 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1142 {
1143   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1144   Mat            A  = a->A,B = a->B;
1145   PetscErrorCode ierr;
1146   PetscReal      isend[5],irecv[5];
1147 
1148   PetscFunctionBegin;
1149   info->block_size = (PetscReal)matin->rmap->bs;
1150 
1151   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1152 
1153   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1154   isend[3] = info->memory;  isend[4] = info->mallocs;
1155 
1156   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1157 
1158   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1159   isend[3] += info->memory;  isend[4] += info->mallocs;
1160   if (flag == MAT_LOCAL) {
1161     info->nz_used      = isend[0];
1162     info->nz_allocated = isend[1];
1163     info->nz_unneeded  = isend[2];
1164     info->memory       = isend[3];
1165     info->mallocs      = isend[4];
1166   } else if (flag == MAT_GLOBAL_MAX) {
1167     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1168 
1169     info->nz_used      = irecv[0];
1170     info->nz_allocated = irecv[1];
1171     info->nz_unneeded  = irecv[2];
1172     info->memory       = irecv[3];
1173     info->mallocs      = irecv[4];
1174   } else if (flag == MAT_GLOBAL_SUM) {
1175     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1176 
1177     info->nz_used      = irecv[0];
1178     info->nz_allocated = irecv[1];
1179     info->nz_unneeded  = irecv[2];
1180     info->memory       = irecv[3];
1181     info->mallocs      = irecv[4];
1182   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1183   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1184   info->fill_ratio_needed = 0;
1185   info->factor_mallocs    = 0;
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 #undef __FUNCT__
1190 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1191 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1192 {
1193   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1194   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1195   PetscErrorCode ierr;
1196 
1197   PetscFunctionBegin;
1198   switch (op) {
1199   case MAT_NEW_NONZERO_LOCATIONS:
1200   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1201   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1202   case MAT_KEEP_NONZERO_PATTERN:
1203   case MAT_NEW_NONZERO_LOCATION_ERR:
1204     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1205     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1206     break;
1207   case MAT_ROW_ORIENTED:
1208     a->roworiented = flg;
1209 
1210     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1211     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1212     break;
1213   case MAT_NEW_DIAGONALS:
1214     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1215     break;
1216   case MAT_IGNORE_OFF_PROC_ENTRIES:
1217     a->donotstash = flg;
1218     break;
1219   case MAT_USE_HASH_TABLE:
1220     a->ht_flag = flg;
1221     break;
1222   case MAT_HERMITIAN:
1223     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1224     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1225 
1226     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1227     break;
1228   case MAT_SPD:
1229     A->spd_set = PETSC_TRUE;
1230     A->spd     = flg;
1231     if (flg) {
1232       A->symmetric                  = PETSC_TRUE;
1233       A->structurally_symmetric     = PETSC_TRUE;
1234       A->symmetric_set              = PETSC_TRUE;
1235       A->structurally_symmetric_set = PETSC_TRUE;
1236     }
1237     break;
1238   case MAT_SYMMETRIC:
1239     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1240     break;
1241   case MAT_STRUCTURALLY_SYMMETRIC:
1242     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1243     break;
1244   case MAT_SYMMETRY_ETERNAL:
1245     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1246     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1247     break;
1248   case MAT_IGNORE_LOWER_TRIANGULAR:
1249     aA->ignore_ltriangular = flg;
1250     break;
1251   case MAT_ERROR_LOWER_TRIANGULAR:
1252     aA->ignore_ltriangular = flg;
1253     break;
1254   case MAT_GETROW_UPPERTRIANGULAR:
1255     aA->getrow_utriangular = flg;
1256     break;
1257   default:
1258     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1259   }
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1265 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1266 {
1267   PetscErrorCode ierr;
1268 
1269   PetscFunctionBegin;
1270   if (MAT_INITIAL_MATRIX || *B != A) {
1271     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1272   }
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1278 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1279 {
1280   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1281   Mat            a     = baij->A, b=baij->B;
1282   PetscErrorCode ierr;
1283   PetscInt       nv,m,n;
1284   PetscBool      flg;
1285 
1286   PetscFunctionBegin;
1287   if (ll != rr) {
1288     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1289     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1290   }
1291   if (!ll) PetscFunctionReturn(0);
1292 
1293   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1294   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1295 
1296   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1297   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1298 
1299   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1300 
1301   /* left diagonalscale the off-diagonal part */
1302   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1303 
1304   /* scale the diagonal part */
1305   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1306 
1307   /* right diagonalscale the off-diagonal part */
1308   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1309   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 #undef __FUNCT__
1314 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1315 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1316 {
1317   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1318   PetscErrorCode ierr;
1319 
1320   PetscFunctionBegin;
1321   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1326 
1327 #undef __FUNCT__
1328 #define __FUNCT__ "MatEqual_MPISBAIJ"
1329 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1330 {
1331   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1332   Mat            a,b,c,d;
1333   PetscBool      flg;
1334   PetscErrorCode ierr;
1335 
1336   PetscFunctionBegin;
1337   a = matA->A; b = matA->B;
1338   c = matB->A; d = matB->B;
1339 
1340   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1341   if (flg) {
1342     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1343   }
1344   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1345   PetscFunctionReturn(0);
1346 }
1347 
1348 #undef __FUNCT__
1349 #define __FUNCT__ "MatCopy_MPISBAIJ"
1350 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1351 {
1352   PetscErrorCode ierr;
1353   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1354   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1355 
1356   PetscFunctionBegin;
1357   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1358   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1359     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1360     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1361     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1362   } else {
1363     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1364     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1365   }
1366   PetscFunctionReturn(0);
1367 }
1368 
1369 #undef __FUNCT__
1370 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1371 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1372 {
1373   PetscErrorCode ierr;
1374 
1375   PetscFunctionBegin;
1376   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1377   PetscFunctionReturn(0);
1378 }
1379 
1380 #undef __FUNCT__
1381 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1382 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1383 {
1384   PetscErrorCode ierr;
1385   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1386   PetscBLASInt   bnz,one=1;
1387   Mat_SeqSBAIJ   *xa,*ya;
1388   Mat_SeqBAIJ    *xb,*yb;
1389 
1390   PetscFunctionBegin;
1391   if (str == SAME_NONZERO_PATTERN) {
1392     PetscScalar alpha = a;
1393     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1394     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1395     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1396     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1397     xb   = (Mat_SeqBAIJ*)xx->B->data;
1398     yb   = (Mat_SeqBAIJ*)yy->B->data;
1399     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1400     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1401   } else {
1402     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1403     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1404     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1405   }
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 #undef __FUNCT__
1410 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1411 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1412 {
1413   PetscErrorCode ierr;
1414   PetscInt       i;
1415   PetscBool      flg;
1416 
1417   PetscFunctionBegin;
1418   for (i=0; i<n; i++) {
1419     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1420     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1421   }
1422   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 
1427 /* -------------------------------------------------------------------*/
1428 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1429                                        MatGetRow_MPISBAIJ,
1430                                        MatRestoreRow_MPISBAIJ,
1431                                        MatMult_MPISBAIJ,
1432                                /*  4*/ MatMultAdd_MPISBAIJ,
1433                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1434                                        MatMultAdd_MPISBAIJ,
1435                                        0,
1436                                        0,
1437                                        0,
1438                                /* 10*/ 0,
1439                                        0,
1440                                        0,
1441                                        MatSOR_MPISBAIJ,
1442                                        MatTranspose_MPISBAIJ,
1443                                /* 15*/ MatGetInfo_MPISBAIJ,
1444                                        MatEqual_MPISBAIJ,
1445                                        MatGetDiagonal_MPISBAIJ,
1446                                        MatDiagonalScale_MPISBAIJ,
1447                                        MatNorm_MPISBAIJ,
1448                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1449                                        MatAssemblyEnd_MPISBAIJ,
1450                                        MatSetOption_MPISBAIJ,
1451                                        MatZeroEntries_MPISBAIJ,
1452                                /* 24*/ 0,
1453                                        0,
1454                                        0,
1455                                        0,
1456                                        0,
1457                                /* 29*/ MatSetUp_MPISBAIJ,
1458                                        0,
1459                                        0,
1460                                        0,
1461                                        0,
1462                                /* 34*/ MatDuplicate_MPISBAIJ,
1463                                        0,
1464                                        0,
1465                                        0,
1466                                        0,
1467                                /* 39*/ MatAXPY_MPISBAIJ,
1468                                        MatGetSubMatrices_MPISBAIJ,
1469                                        MatIncreaseOverlap_MPISBAIJ,
1470                                        MatGetValues_MPISBAIJ,
1471                                        MatCopy_MPISBAIJ,
1472                                /* 44*/ 0,
1473                                        MatScale_MPISBAIJ,
1474                                        0,
1475                                        0,
1476                                        0,
1477                                /* 49*/ 0,
1478                                        0,
1479                                        0,
1480                                        0,
1481                                        0,
1482                                /* 54*/ 0,
1483                                        0,
1484                                        MatSetUnfactored_MPISBAIJ,
1485                                        0,
1486                                        MatSetValuesBlocked_MPISBAIJ,
1487                                /* 59*/ 0,
1488                                        0,
1489                                        0,
1490                                        0,
1491                                        0,
1492                                /* 64*/ 0,
1493                                        0,
1494                                        0,
1495                                        0,
1496                                        0,
1497                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1498                                        0,
1499                                        0,
1500                                        0,
1501                                        0,
1502                                /* 74*/ 0,
1503                                        0,
1504                                        0,
1505                                        0,
1506                                        0,
1507                                /* 79*/ 0,
1508                                        0,
1509                                        0,
1510                                        0,
1511                                        MatLoad_MPISBAIJ,
1512                                /* 84*/ 0,
1513                                        0,
1514                                        0,
1515                                        0,
1516                                        0,
1517                                /* 89*/ 0,
1518                                        0,
1519                                        0,
1520                                        0,
1521                                        0,
1522                                /* 94*/ 0,
1523                                        0,
1524                                        0,
1525                                        0,
1526                                        0,
1527                                /* 99*/ 0,
1528                                        0,
1529                                        0,
1530                                        0,
1531                                        0,
1532                                /*104*/ 0,
1533                                        MatRealPart_MPISBAIJ,
1534                                        MatImaginaryPart_MPISBAIJ,
1535                                        MatGetRowUpperTriangular_MPISBAIJ,
1536                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1537                                /*109*/ 0,
1538                                        0,
1539                                        0,
1540                                        0,
1541                                        0,
1542                                /*114*/ 0,
1543                                        0,
1544                                        0,
1545                                        0,
1546                                        0,
1547                                /*119*/ 0,
1548                                        0,
1549                                        0,
1550                                        0,
1551                                        0,
1552                                /*124*/ 0,
1553                                        0,
1554                                        0,
1555                                        0,
1556                                        0,
1557                                /*129*/ 0,
1558                                        0,
1559                                        0,
1560                                        0,
1561                                        0,
1562                                /*134*/ 0,
1563                                        0,
1564                                        0,
1565                                        0,
1566                                        0,
1567                                /*139*/ 0,
1568                                        0,
1569                                        0
1570 };
1571 
1572 
1573 #undef __FUNCT__
1574 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1575 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1576 {
1577   PetscFunctionBegin;
1578   *a = ((Mat_MPISBAIJ*)A->data)->A;
1579   PetscFunctionReturn(0);
1580 }
1581 
1582 #undef __FUNCT__
1583 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1584 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1585 {
1586   Mat_MPISBAIJ   *b;
1587   PetscErrorCode ierr;
1588   PetscInt       i,mbs,Mbs;
1589 
1590   PetscFunctionBegin;
1591   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1592   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1593   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1594   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1595 
1596   b   = (Mat_MPISBAIJ*)B->data;
1597   mbs = B->rmap->n/bs;
1598   Mbs = B->rmap->N/bs;
1599   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1600 
1601   B->rmap->bs = bs;
1602   b->bs2      = bs*bs;
1603   b->mbs      = mbs;
1604   b->nbs      = mbs;
1605   b->Mbs      = Mbs;
1606   b->Nbs      = Mbs;
1607 
1608   for (i=0; i<=b->size; i++) {
1609     b->rangebs[i] = B->rmap->range[i]/bs;
1610   }
1611   b->rstartbs = B->rmap->rstart/bs;
1612   b->rendbs   = B->rmap->rend/bs;
1613 
1614   b->cstartbs = B->cmap->rstart/bs;
1615   b->cendbs   = B->cmap->rend/bs;
1616 
1617   if (!B->preallocated) {
1618     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1619     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1620     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1621     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1622     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1623     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1624     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1625     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1626     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1627   }
1628 
1629   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1630   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1631 
1632   B->preallocated = PETSC_TRUE;
1633   PetscFunctionReturn(0);
1634 }
1635 
1636 #undef __FUNCT__
1637 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1638 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1639 {
1640   PetscInt       m,rstart,cstart,cend;
1641   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1642   const PetscInt *JJ    =0;
1643   PetscScalar    *values=0;
1644   PetscErrorCode ierr;
1645 
1646   PetscFunctionBegin;
1647   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1648   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1649   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1650   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1651   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1652   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1653   m      = B->rmap->n/bs;
1654   rstart = B->rmap->rstart/bs;
1655   cstart = B->cmap->rstart/bs;
1656   cend   = B->cmap->rend/bs;
1657 
1658   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1659   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
1660   for (i=0; i<m; i++) {
1661     nz = ii[i+1] - ii[i];
1662     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1663     nz_max = PetscMax(nz_max,nz);
1664     JJ     = jj + ii[i];
1665     for (j=0; j<nz; j++) {
1666       if (*JJ >= cstart) break;
1667       JJ++;
1668     }
1669     d = 0;
1670     for (; j<nz; j++) {
1671       if (*JJ++ >= cend) break;
1672       d++;
1673     }
1674     d_nnz[i] = d;
1675     o_nnz[i] = nz - d;
1676   }
1677   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1678   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1679 
1680   values = (PetscScalar*)V;
1681   if (!values) {
1682     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
1683     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1684   }
1685   for (i=0; i<m; i++) {
1686     PetscInt          row    = i + rstart;
1687     PetscInt          ncols  = ii[i+1] - ii[i];
1688     const PetscInt    *icols = jj + ii[i];
1689     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1690     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1691   }
1692 
1693   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1694   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1695   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1696   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1697   PetscFunctionReturn(0);
1698 }
1699 
1700 #if defined(PETSC_HAVE_MUMPS)
1701 PETSC_EXTERN PetscErrorCode MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1702 #endif
1703 #if defined(PETSC_HAVE_PASTIX)
1704 PETSC_EXTERN PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1705 #endif
1706 
1707 /*MC
1708    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1709    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1710    the matrix is stored.
1711 
1712   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1713   can call MatSetOption(Mat, MAT_HERMITIAN);
1714 
1715    Options Database Keys:
1716 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1717 
1718   Level: beginner
1719 
1720 .seealso: MatCreateMPISBAIJ
1721 M*/
1722 
1723 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
1724 
1725 #undef __FUNCT__
1726 #define __FUNCT__ "MatCreate_MPISBAIJ"
1727 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1728 {
1729   Mat_MPISBAIJ   *b;
1730   PetscErrorCode ierr;
1731   PetscBool      flg;
1732 
1733   PetscFunctionBegin;
1734   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
1735   B->data = (void*)b;
1736   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1737 
1738   B->ops->destroy = MatDestroy_MPISBAIJ;
1739   B->ops->view    = MatView_MPISBAIJ;
1740   B->assembled    = PETSC_FALSE;
1741   B->insertmode   = NOT_SET_VALUES;
1742 
1743   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
1744   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
1745 
1746   /* build local table of row and column ownerships */
1747   ierr = PetscMalloc1((b->size+2),&b->rangebs);CHKERRQ(ierr);
1748 
1749   /* build cache for off array entries formed */
1750   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
1751 
1752   b->donotstash  = PETSC_FALSE;
1753   b->colmap      = NULL;
1754   b->garray      = NULL;
1755   b->roworiented = PETSC_TRUE;
1756 
1757   /* stuff used in block assembly */
1758   b->barray = 0;
1759 
1760   /* stuff used for matrix vector multiply */
1761   b->lvec    = 0;
1762   b->Mvctx   = 0;
1763   b->slvec0  = 0;
1764   b->slvec0b = 0;
1765   b->slvec1  = 0;
1766   b->slvec1a = 0;
1767   b->slvec1b = 0;
1768   b->sMvctx  = 0;
1769 
1770   /* stuff for MatGetRow() */
1771   b->rowindices   = 0;
1772   b->rowvalues    = 0;
1773   b->getrowactive = PETSC_FALSE;
1774 
1775   /* hash table stuff */
1776   b->ht           = 0;
1777   b->hd           = 0;
1778   b->ht_size      = 0;
1779   b->ht_flag      = PETSC_FALSE;
1780   b->ht_fact      = 0;
1781   b->ht_total_ct  = 0;
1782   b->ht_insert_ct = 0;
1783 
1784   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1785   b->ijonly = PETSC_FALSE;
1786 
1787   b->in_loc = 0;
1788   b->v_loc  = 0;
1789   b->n_loc  = 0;
1790   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1791   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
1792   if (flg) {
1793     PetscReal fact = 1.39;
1794     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1795     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
1796     if (fact <= 1.0) fact = 1.39;
1797     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1798     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1799   }
1800   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1801 
1802 #if defined(PETSC_HAVE_PASTIX)
1803   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1804 #endif
1805 #if defined(PETSC_HAVE_MUMPS)
1806   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1807 #endif
1808   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1809   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1810   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1811   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1812   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1813   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1814 
1815   B->symmetric                  = PETSC_TRUE;
1816   B->structurally_symmetric     = PETSC_TRUE;
1817   B->symmetric_set              = PETSC_TRUE;
1818   B->structurally_symmetric_set = PETSC_TRUE;
1819 
1820   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 /*MC
1825    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1826 
1827    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1828    and MATMPISBAIJ otherwise.
1829 
1830    Options Database Keys:
1831 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1832 
1833   Level: beginner
1834 
1835 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1836 M*/
1837 
1838 #undef __FUNCT__
1839 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1840 /*@C
1841    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1842    the user should preallocate the matrix storage by setting the parameters
1843    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1844    performance can be increased by more than a factor of 50.
1845 
1846    Collective on Mat
1847 
1848    Input Parameters:
1849 +  A - the matrix
1850 .  bs   - size of blockk
1851 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1852            submatrix  (same for all local rows)
1853 .  d_nnz - array containing the number of block nonzeros in the various block rows
1854            in the upper triangular and diagonal part of the in diagonal portion of the local
1855            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
1856            for the diagonal entry and set a value even if it is zero.
1857 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1858            submatrix (same for all local rows).
1859 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1860            off-diagonal portion of the local submatrix that is right of the diagonal
1861            (possibly different for each block row) or NULL.
1862 
1863 
1864    Options Database Keys:
1865 .   -mat_no_unroll - uses code that does not unroll the loops in the
1866                      block calculations (much slower)
1867 .   -mat_block_size - size of the blocks to use
1868 
1869    Notes:
1870 
1871    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1872    than it must be used on all processors that share the object for that argument.
1873 
1874    If the *_nnz parameter is given then the *_nz parameter is ignored
1875 
1876    Storage Information:
1877    For a square global matrix we define each processor's diagonal portion
1878    to be its local rows and the corresponding columns (a square submatrix);
1879    each processor's off-diagonal portion encompasses the remainder of the
1880    local matrix (a rectangular submatrix).
1881 
1882    The user can specify preallocated storage for the diagonal part of
1883    the local submatrix with either d_nz or d_nnz (not both).  Set
1884    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
1885    memory allocation.  Likewise, specify preallocated storage for the
1886    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1887 
1888    You can call MatGetInfo() to get information on how effective the preallocation was;
1889    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1890    You can also run with the option -info and look for messages with the string
1891    malloc in them to see if additional memory allocation was needed.
1892 
1893    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1894    the figure below we depict these three local rows and all columns (0-11).
1895 
1896 .vb
1897            0 1 2 3 4 5 6 7 8 9 10 11
1898           --------------------------
1899    row 3  |. . . d d d o o o o  o  o
1900    row 4  |. . . d d d o o o o  o  o
1901    row 5  |. . . d d d o o o o  o  o
1902           --------------------------
1903 .ve
1904 
1905    Thus, any entries in the d locations are stored in the d (diagonal)
1906    submatrix, and any entries in the o locations are stored in the
1907    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1908    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1909 
1910    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1911    plus the diagonal part of the d matrix,
1912    and o_nz should indicate the number of block nonzeros per row in the o matrix
1913 
1914    In general, for PDE problems in which most nonzeros are near the diagonal,
1915    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1916    or you will get TERRIBLE performance; see the users' manual chapter on
1917    matrices.
1918 
1919    Level: intermediate
1920 
1921 .keywords: matrix, block, aij, compressed row, sparse, parallel
1922 
1923 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
1924 @*/
1925 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1926 {
1927   PetscErrorCode ierr;
1928 
1929   PetscFunctionBegin;
1930   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1931   PetscValidType(B,1);
1932   PetscValidLogicalCollectiveInt(B,bs,2);
1933   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1934   PetscFunctionReturn(0);
1935 }
1936 
1937 #undef __FUNCT__
1938 #define __FUNCT__ "MatCreateSBAIJ"
1939 /*@C
1940    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1941    (block compressed row).  For good matrix assembly performance
1942    the user should preallocate the matrix storage by setting the parameters
1943    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1944    performance can be increased by more than a factor of 50.
1945 
1946    Collective on MPI_Comm
1947 
1948    Input Parameters:
1949 +  comm - MPI communicator
1950 .  bs   - size of blockk
1951 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1952            This value should be the same as the local size used in creating the
1953            y vector for the matrix-vector product y = Ax.
1954 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1955            This value should be the same as the local size used in creating the
1956            x vector for the matrix-vector product y = Ax.
1957 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1958 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1959 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1960            submatrix  (same for all local rows)
1961 .  d_nnz - array containing the number of block nonzeros in the various block rows
1962            in the upper triangular portion of the in diagonal portion of the local
1963            (possibly different for each block block row) or NULL.
1964            If you plan to factor the matrix you must leave room for the diagonal entry and
1965            set its value even if it is zero.
1966 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1967            submatrix (same for all local rows).
1968 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1969            off-diagonal portion of the local submatrix (possibly different for
1970            each block row) or NULL.
1971 
1972    Output Parameter:
1973 .  A - the matrix
1974 
1975    Options Database Keys:
1976 .   -mat_no_unroll - uses code that does not unroll the loops in the
1977                      block calculations (much slower)
1978 .   -mat_block_size - size of the blocks to use
1979 .   -mat_mpi - use the parallel matrix data structures even on one processor
1980                (defaults to using SeqBAIJ format on one processor)
1981 
1982    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1983    MatXXXXSetPreallocation() paradgm instead of this routine directly.
1984    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1985 
1986    Notes:
1987    The number of rows and columns must be divisible by blocksize.
1988    This matrix type does not support complex Hermitian operation.
1989 
1990    The user MUST specify either the local or global matrix dimensions
1991    (possibly both).
1992 
1993    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1994    than it must be used on all processors that share the object for that argument.
1995 
1996    If the *_nnz parameter is given then the *_nz parameter is ignored
1997 
1998    Storage Information:
1999    For a square global matrix we define each processor's diagonal portion
2000    to be its local rows and the corresponding columns (a square submatrix);
2001    each processor's off-diagonal portion encompasses the remainder of the
2002    local matrix (a rectangular submatrix).
2003 
2004    The user can specify preallocated storage for the diagonal part of
2005    the local submatrix with either d_nz or d_nnz (not both).  Set
2006    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2007    memory allocation.  Likewise, specify preallocated storage for the
2008    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2009 
2010    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2011    the figure below we depict these three local rows and all columns (0-11).
2012 
2013 .vb
2014            0 1 2 3 4 5 6 7 8 9 10 11
2015           --------------------------
2016    row 3  |. . . d d d o o o o  o  o
2017    row 4  |. . . d d d o o o o  o  o
2018    row 5  |. . . d d d o o o o  o  o
2019           --------------------------
2020 .ve
2021 
2022    Thus, any entries in the d locations are stored in the d (diagonal)
2023    submatrix, and any entries in the o locations are stored in the
2024    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2025    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2026 
2027    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2028    plus the diagonal part of the d matrix,
2029    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2030    In general, for PDE problems in which most nonzeros are near the diagonal,
2031    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2032    or you will get TERRIBLE performance; see the users' manual chapter on
2033    matrices.
2034 
2035    Level: intermediate
2036 
2037 .keywords: matrix, block, aij, compressed row, sparse, parallel
2038 
2039 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2040 @*/
2041 
2042 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2043 {
2044   PetscErrorCode ierr;
2045   PetscMPIInt    size;
2046 
2047   PetscFunctionBegin;
2048   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2049   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2050   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2051   if (size > 1) {
2052     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2053     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2054   } else {
2055     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2056     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2057   }
2058   PetscFunctionReturn(0);
2059 }
2060 
2061 
2062 #undef __FUNCT__
2063 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2064 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2065 {
2066   Mat            mat;
2067   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2068   PetscErrorCode ierr;
2069   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2070   PetscScalar    *array;
2071 
2072   PetscFunctionBegin;
2073   *newmat = 0;
2074 
2075   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2076   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2077   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2078   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2079   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2080   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2081 
2082   mat->factortype   = matin->factortype;
2083   mat->preallocated = PETSC_TRUE;
2084   mat->assembled    = PETSC_TRUE;
2085   mat->insertmode   = NOT_SET_VALUES;
2086 
2087   a      = (Mat_MPISBAIJ*)mat->data;
2088   a->bs2 = oldmat->bs2;
2089   a->mbs = oldmat->mbs;
2090   a->nbs = oldmat->nbs;
2091   a->Mbs = oldmat->Mbs;
2092   a->Nbs = oldmat->Nbs;
2093 
2094 
2095   a->size         = oldmat->size;
2096   a->rank         = oldmat->rank;
2097   a->donotstash   = oldmat->donotstash;
2098   a->roworiented  = oldmat->roworiented;
2099   a->rowindices   = 0;
2100   a->rowvalues    = 0;
2101   a->getrowactive = PETSC_FALSE;
2102   a->barray       = 0;
2103   a->rstartbs     = oldmat->rstartbs;
2104   a->rendbs       = oldmat->rendbs;
2105   a->cstartbs     = oldmat->cstartbs;
2106   a->cendbs       = oldmat->cendbs;
2107 
2108   /* hash table stuff */
2109   a->ht           = 0;
2110   a->hd           = 0;
2111   a->ht_size      = 0;
2112   a->ht_flag      = oldmat->ht_flag;
2113   a->ht_fact      = oldmat->ht_fact;
2114   a->ht_total_ct  = 0;
2115   a->ht_insert_ct = 0;
2116 
2117   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2118   if (oldmat->colmap) {
2119 #if defined(PETSC_USE_CTABLE)
2120     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2121 #else
2122     ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr);
2123     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2124     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2125 #endif
2126   } else a->colmap = 0;
2127 
2128   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2129     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2130     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2131     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2132   } else a->garray = 0;
2133 
2134   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2135   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2136   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2137   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2138   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2139 
2140   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2141   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2142   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2143   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2144 
2145   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2146   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2147   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2148   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2149   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2150   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2151   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2152   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2153   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2154   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2155   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2156   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2157   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2158 
2159   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2160   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2161   a->sMvctx = oldmat->sMvctx;
2162   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2163 
2164   ierr    =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2165   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2166   ierr    =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2167   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2168   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2169   *newmat = mat;
2170   PetscFunctionReturn(0);
2171 }
2172 
2173 #undef __FUNCT__
2174 #define __FUNCT__ "MatLoad_MPISBAIJ"
2175 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2176 {
2177   PetscErrorCode ierr;
2178   PetscInt       i,nz,j,rstart,rend;
2179   PetscScalar    *vals,*buf;
2180   MPI_Comm       comm;
2181   MPI_Status     status;
2182   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2183   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2184   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2185   PetscInt       bs       =1,Mbs,mbs,extra_rows;
2186   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2187   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2188   int            fd;
2189 
2190   PetscFunctionBegin;
2191   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2192   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2193   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2194   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2195 
2196   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2197   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2198   if (!rank) {
2199     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2200     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2201     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2202     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2203   }
2204 
2205   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2206 
2207   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2208   M    = header[1];
2209   N    = header[2];
2210 
2211   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2212   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2213   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2214 
2215   /* If global sizes are set, check if they are consistent with that given in the file */
2216   if (sizesset) {
2217     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2218   }
2219   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2220   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2221 
2222   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2223 
2224   /*
2225      This code adds extra rows to make sure the number of rows is
2226      divisible by the blocksize
2227   */
2228   Mbs        = M/bs;
2229   extra_rows = bs - M + bs*(Mbs);
2230   if (extra_rows == bs) extra_rows = 0;
2231   else                  Mbs++;
2232   if (extra_rows &&!rank) {
2233     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2234   }
2235 
2236   /* determine ownership of all rows */
2237   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2238     mbs = Mbs/size + ((Mbs % size) > rank);
2239     m   = mbs*bs;
2240   } else { /* User Set */
2241     m   = newmat->rmap->n;
2242     mbs = m/bs;
2243   }
2244   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2245   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2246   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2247   rowners[0] = 0;
2248   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2249   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2250   rstart = rowners[rank];
2251   rend   = rowners[rank+1];
2252 
2253   /* distribute row lengths to all processors */
2254   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2255   if (!rank) {
2256     ierr = PetscMalloc1((M+extra_rows),&rowlengths);CHKERRQ(ierr);
2257     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2258     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2259     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2260     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2261     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2262     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2263   } else {
2264     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2265   }
2266 
2267   if (!rank) {   /* procs[0] */
2268     /* calculate the number of nonzeros on each processor */
2269     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2270     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2271     for (i=0; i<size; i++) {
2272       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2273         procsnz[i] += rowlengths[j];
2274       }
2275     }
2276     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2277 
2278     /* determine max buffer needed and allocate it */
2279     maxnz = 0;
2280     for (i=0; i<size; i++) {
2281       maxnz = PetscMax(maxnz,procsnz[i]);
2282     }
2283     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2284 
2285     /* read in my part of the matrix column indices  */
2286     nz     = procsnz[0];
2287     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2288     mycols = ibuf;
2289     if (size == 1) nz -= extra_rows;
2290     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2291     if (size == 1) {
2292       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2293     }
2294 
2295     /* read in every ones (except the last) and ship off */
2296     for (i=1; i<size-1; i++) {
2297       nz   = procsnz[i];
2298       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2299       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2300     }
2301     /* read in the stuff for the last proc */
2302     if (size != 1) {
2303       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2304       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2305       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2306       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2307     }
2308     ierr = PetscFree(cols);CHKERRQ(ierr);
2309   } else {  /* procs[i], i>0 */
2310     /* determine buffer space needed for message */
2311     nz = 0;
2312     for (i=0; i<m; i++) nz += locrowlens[i];
2313     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2314     mycols = ibuf;
2315     /* receive message of column indices*/
2316     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2317     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2318     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2319   }
2320 
2321   /* loop over local rows, determining number of off diagonal entries */
2322   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2323   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2324   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2325   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2326   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2327   rowcount = 0;
2328   nzcount  = 0;
2329   for (i=0; i<mbs; i++) {
2330     dcount  = 0;
2331     odcount = 0;
2332     for (j=0; j<bs; j++) {
2333       kmax = locrowlens[rowcount];
2334       for (k=0; k<kmax; k++) {
2335         tmp = mycols[nzcount++]/bs; /* block col. index */
2336         if (!mask[tmp]) {
2337           mask[tmp] = 1;
2338           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2339           else masked1[dcount++] = tmp; /* entry in diag portion */
2340         }
2341       }
2342       rowcount++;
2343     }
2344 
2345     dlens[i]  = dcount;  /* d_nzz[i] */
2346     odlens[i] = odcount; /* o_nzz[i] */
2347 
2348     /* zero out the mask elements we set */
2349     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2350     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2351   }
2352   if (!sizesset) {
2353     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2354   }
2355   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2356   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2357 
2358   if (!rank) {
2359     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2360     /* read in my part of the matrix numerical values  */
2361     nz     = procsnz[0];
2362     vals   = buf;
2363     mycols = ibuf;
2364     if (size == 1) nz -= extra_rows;
2365     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2366     if (size == 1) {
2367       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2368     }
2369 
2370     /* insert into matrix */
2371     jj = rstart*bs;
2372     for (i=0; i<m; i++) {
2373       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2374       mycols += locrowlens[i];
2375       vals   += locrowlens[i];
2376       jj++;
2377     }
2378 
2379     /* read in other processors (except the last one) and ship out */
2380     for (i=1; i<size-1; i++) {
2381       nz   = procsnz[i];
2382       vals = buf;
2383       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2384       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2385     }
2386     /* the last proc */
2387     if (size != 1) {
2388       nz   = procsnz[i] - extra_rows;
2389       vals = buf;
2390       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2391       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2392       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2393     }
2394     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2395 
2396   } else {
2397     /* receive numeric values */
2398     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2399 
2400     /* receive message of values*/
2401     vals   = buf;
2402     mycols = ibuf;
2403     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2404     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2405     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2406 
2407     /* insert into matrix */
2408     jj = rstart*bs;
2409     for (i=0; i<m; i++) {
2410       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2411       mycols += locrowlens[i];
2412       vals   += locrowlens[i];
2413       jj++;
2414     }
2415   }
2416 
2417   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2418   ierr = PetscFree(buf);CHKERRQ(ierr);
2419   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2420   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2421   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2422   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2423   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2424   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2425   PetscFunctionReturn(0);
2426 }
2427 
2428 #undef __FUNCT__
2429 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2430 /*XXXXX@
2431    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2432 
2433    Input Parameters:
2434 .  mat  - the matrix
2435 .  fact - factor
2436 
2437    Not Collective on Mat, each process can have a different hash factor
2438 
2439    Level: advanced
2440 
2441   Notes:
2442    This can also be set by the command line option: -mat_use_hash_table fact
2443 
2444 .keywords: matrix, hashtable, factor, HT
2445 
2446 .seealso: MatSetOption()
2447 @XXXXX*/
2448 
2449 
2450 #undef __FUNCT__
2451 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2452 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2453 {
2454   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2455   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2456   PetscReal      atmp;
2457   PetscReal      *work,*svalues,*rvalues;
2458   PetscErrorCode ierr;
2459   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2460   PetscMPIInt    rank,size;
2461   PetscInt       *rowners_bs,dest,count,source;
2462   PetscScalar    *va;
2463   MatScalar      *ba;
2464   MPI_Status     stat;
2465 
2466   PetscFunctionBegin;
2467   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2468   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2469   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2470 
2471   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2472   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2473 
2474   bs  = A->rmap->bs;
2475   mbs = a->mbs;
2476   Mbs = a->Mbs;
2477   ba  = b->a;
2478   bi  = b->i;
2479   bj  = b->j;
2480 
2481   /* find ownerships */
2482   rowners_bs = A->rmap->range;
2483 
2484   /* each proc creates an array to be distributed */
2485   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2486   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2487 
2488   /* row_max for B */
2489   if (rank != size-1) {
2490     for (i=0; i<mbs; i++) {
2491       ncols = bi[1] - bi[0]; bi++;
2492       brow  = bs*i;
2493       for (j=0; j<ncols; j++) {
2494         bcol = bs*(*bj);
2495         for (kcol=0; kcol<bs; kcol++) {
2496           col  = bcol + kcol;                /* local col index */
2497           col += rowners_bs[rank+1];      /* global col index */
2498           for (krow=0; krow<bs; krow++) {
2499             atmp = PetscAbsScalar(*ba); ba++;
2500             row  = brow + krow;   /* local row index */
2501             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2502             if (work[col] < atmp) work[col] = atmp;
2503           }
2504         }
2505         bj++;
2506       }
2507     }
2508 
2509     /* send values to its owners */
2510     for (dest=rank+1; dest<size; dest++) {
2511       svalues = work + rowners_bs[dest];
2512       count   = rowners_bs[dest+1]-rowners_bs[dest];
2513       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2514     }
2515   }
2516 
2517   /* receive values */
2518   if (rank) {
2519     rvalues = work;
2520     count   = rowners_bs[rank+1]-rowners_bs[rank];
2521     for (source=0; source<rank; source++) {
2522       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2523       /* process values */
2524       for (i=0; i<count; i++) {
2525         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2526       }
2527     }
2528   }
2529 
2530   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2531   ierr = PetscFree(work);CHKERRQ(ierr);
2532   PetscFunctionReturn(0);
2533 }
2534 
2535 #undef __FUNCT__
2536 #define __FUNCT__ "MatSOR_MPISBAIJ"
2537 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2538 {
2539   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2540   PetscErrorCode    ierr;
2541   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2542   PetscScalar       *x,*ptr,*from;
2543   Vec               bb1;
2544   const PetscScalar *b;
2545 
2546   PetscFunctionBegin;
2547   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2548   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2549 
2550   if (flag == SOR_APPLY_UPPER) {
2551     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2552     PetscFunctionReturn(0);
2553   }
2554 
2555   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2556     if (flag & SOR_ZERO_INITIAL_GUESS) {
2557       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2558       its--;
2559     }
2560 
2561     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2562     while (its--) {
2563 
2564       /* lower triangular part: slvec0b = - B^T*xx */
2565       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2566 
2567       /* copy xx into slvec0a */
2568       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2569       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2570       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2571       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2572 
2573       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2574 
2575       /* copy bb into slvec1a */
2576       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2577       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2578       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2579       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2580 
2581       /* set slvec1b = 0 */
2582       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2583 
2584       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2585       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2586       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2587       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2588 
2589       /* upper triangular part: bb1 = bb1 - B*x */
2590       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2591 
2592       /* local diagonal sweep */
2593       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2594     }
2595     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2596   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2597     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2598   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2599     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2600   } else if (flag & SOR_EISENSTAT) {
2601     Vec               xx1;
2602     PetscBool         hasop;
2603     const PetscScalar *diag;
2604     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2605     PetscInt          i,n;
2606 
2607     if (!mat->xx1) {
2608       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2609       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2610     }
2611     xx1 = mat->xx1;
2612     bb1 = mat->bb1;
2613 
2614     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2615 
2616     if (!mat->diag) {
2617       /* this is wrong for same matrix with new nonzero values */
2618       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2619       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2620     }
2621     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2622 
2623     if (hasop) {
2624       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2625       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2626     } else {
2627       /*
2628           These two lines are replaced by code that may be a bit faster for a good compiler
2629       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2630       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2631       */
2632       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2633       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2634       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2635       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2636       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2637       if (omega == 1.0) {
2638         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2639         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2640       } else {
2641         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2642         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2643       }
2644       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2645       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2646       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2647       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2648     }
2649 
2650     /* multiply off-diagonal portion of matrix */
2651     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2652     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2653     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2654     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2655     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2656     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2657     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2658     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2659     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2660     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2661 
2662     /* local sweep */
2663     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2664     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2665   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2666   PetscFunctionReturn(0);
2667 }
2668 
2669 #undef __FUNCT__
2670 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2671 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2672 {
2673   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2674   PetscErrorCode ierr;
2675   Vec            lvec1,bb1;
2676 
2677   PetscFunctionBegin;
2678   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2679   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2680 
2681   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2682     if (flag & SOR_ZERO_INITIAL_GUESS) {
2683       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2684       its--;
2685     }
2686 
2687     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2688     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2689     while (its--) {
2690       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2691 
2692       /* lower diagonal part: bb1 = bb - B^T*xx */
2693       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2694       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2695 
2696       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2697       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2698       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2699 
2700       /* upper diagonal part: bb1 = bb1 - B*x */
2701       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2702       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2703 
2704       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2705 
2706       /* diagonal sweep */
2707       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2708     }
2709     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2710     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2711   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2712   PetscFunctionReturn(0);
2713 }
2714 
2715 #undef __FUNCT__
2716 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2717 /*@
2718      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2719          CSR format the local rows.
2720 
2721    Collective on MPI_Comm
2722 
2723    Input Parameters:
2724 +  comm - MPI communicator
2725 .  bs - the block size, only a block size of 1 is supported
2726 .  m - number of local rows (Cannot be PETSC_DECIDE)
2727 .  n - This value should be the same as the local size used in creating the
2728        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2729        calculated if N is given) For square matrices n is almost always m.
2730 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2731 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2732 .   i - row indices
2733 .   j - column indices
2734 -   a - matrix values
2735 
2736    Output Parameter:
2737 .   mat - the matrix
2738 
2739    Level: intermediate
2740 
2741    Notes:
2742        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2743      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2744      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2745 
2746        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2747 
2748 .keywords: matrix, aij, compressed row, sparse, parallel
2749 
2750 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2751           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2752 @*/
2753 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2754 {
2755   PetscErrorCode ierr;
2756 
2757 
2758   PetscFunctionBegin;
2759   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2760   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2761   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2762   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2763   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2764   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2765   PetscFunctionReturn(0);
2766 }
2767 
2768 
2769 #undef __FUNCT__
2770 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2771 /*@C
2772    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2773    (the default parallel PETSc format).
2774 
2775    Collective on MPI_Comm
2776 
2777    Input Parameters:
2778 +  A - the matrix
2779 .  bs - the block size
2780 .  i - the indices into j for the start of each local row (starts with zero)
2781 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2782 -  v - optional values in the matrix
2783 
2784    Level: developer
2785 
2786 .keywords: matrix, aij, compressed row, sparse, parallel
2787 
2788 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2789 @*/
2790 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2791 {
2792   PetscErrorCode ierr;
2793 
2794   PetscFunctionBegin;
2795   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2796   PetscFunctionReturn(0);
2797 }
2798 
2799 
2800