xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision dced61a5cfeeeda68dfa4ee3e53b34d3cfb8da9f)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 #undef __FUNCT__
11 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
12 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
13 {
14   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
15   PetscErrorCode ierr;
16 
17   PetscFunctionBegin;
18   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
19   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
20   PetscFunctionReturn(0);
21 }
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
25 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
26 {
27   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
28   PetscErrorCode ierr;
29 
30   PetscFunctionBegin;
31   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
32   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
33   PetscFunctionReturn(0);
34 }
35 
36 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
37   { \
38  \
39     brow = row/bs;  \
40     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
41     rmax = aimax[brow]; nrow = ailen[brow]; \
42     bcol = col/bs; \
43     ridx = row % bs; cidx = col % bs; \
44     low  = 0; high = nrow; \
45     while (high-low > 3) { \
46       t = (low+high)/2; \
47       if (rp[t] > bcol) high = t; \
48       else              low  = t; \
49     } \
50     for (_i=low; _i<high; _i++) { \
51       if (rp[_i] > bcol) break; \
52       if (rp[_i] == bcol) { \
53         bap = ap + bs2*_i + bs*cidx + ridx; \
54         if (addv == ADD_VALUES) *bap += value;  \
55         else                    *bap  = value;  \
56         goto a_noinsert; \
57       } \
58     } \
59     if (a->nonew == 1) goto a_noinsert; \
60     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
61     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
62     N = nrow++ - 1;  \
63     /* shift up all the later entries in this row */ \
64     for (ii=N; ii>=_i; ii--) { \
65       rp[ii+1] = rp[ii]; \
66       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
67     } \
68     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
69     rp[_i]                      = bcol;  \
70     ap[bs2*_i + bs*cidx + ridx] = value;  \
71     A->nonzerostate++;\
72 a_noinsert:; \
73     ailen[brow] = nrow; \
74   }
75 
76 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
77   { \
78     brow = row/bs;  \
79     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
80     rmax = bimax[brow]; nrow = bilen[brow]; \
81     bcol = col/bs; \
82     ridx = row % bs; cidx = col % bs; \
83     low  = 0; high = nrow; \
84     while (high-low > 3) { \
85       t = (low+high)/2; \
86       if (rp[t] > bcol) high = t; \
87       else              low  = t; \
88     } \
89     for (_i=low; _i<high; _i++) { \
90       if (rp[_i] > bcol) break; \
91       if (rp[_i] == bcol) { \
92         bap = ap + bs2*_i + bs*cidx + ridx; \
93         if (addv == ADD_VALUES) *bap += value;  \
94         else                    *bap  = value;  \
95         goto b_noinsert; \
96       } \
97     } \
98     if (b->nonew == 1) goto b_noinsert; \
99     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
100     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
101     N = nrow++ - 1;  \
102     /* shift up all the later entries in this row */ \
103     for (ii=N; ii>=_i; ii--) { \
104       rp[ii+1] = rp[ii]; \
105       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
106     } \
107     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
108     rp[_i]                      = bcol;  \
109     ap[bs2*_i + bs*cidx + ridx] = value;  \
110     B->nonzerostate++;\
111 b_noinsert:; \
112     bilen[brow] = nrow; \
113   }
114 
115 /* Only add/insert a(i,j) with i<=j (blocks).
116    Any a(i,j) with i>j input by user is ingored.
117 */
118 #undef __FUNCT__
119 #define __FUNCT__ "MatSetValues_MPISBAIJ"
120 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
121 {
122   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
123   MatScalar      value;
124   PetscBool      roworiented = baij->roworiented;
125   PetscErrorCode ierr;
126   PetscInt       i,j,row,col;
127   PetscInt       rstart_orig=mat->rmap->rstart;
128   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
129   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
130 
131   /* Some Variables required in the macro */
132   Mat          A     = baij->A;
133   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
134   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
135   MatScalar    *aa   =a->a;
136 
137   Mat         B     = baij->B;
138   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
139   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
140   MatScalar   *ba   =b->a;
141 
142   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
143   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
144   MatScalar *ap,*bap;
145 
146   /* for stash */
147   PetscInt  n_loc, *in_loc = NULL;
148   MatScalar *v_loc = NULL;
149 
150   PetscFunctionBegin;
151   if (!baij->donotstash) {
152     if (n > baij->n_loc) {
153       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
154       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
155       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
156       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
157 
158       baij->n_loc = n;
159     }
160     in_loc = baij->in_loc;
161     v_loc  = baij->v_loc;
162   }
163 
164   for (i=0; i<m; i++) {
165     if (im[i] < 0) continue;
166 #if defined(PETSC_USE_DEBUG)
167     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
168 #endif
169     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
170       row = im[i] - rstart_orig;              /* local row index */
171       for (j=0; j<n; j++) {
172         if (im[i]/bs > in[j]/bs) {
173           if (a->ignore_ltriangular) {
174             continue;    /* ignore lower triangular blocks */
175           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
176         }
177         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
178           col  = in[j] - cstart_orig;         /* local col index */
179           brow = row/bs; bcol = col/bs;
180           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
181           if (roworiented) value = v[i*n+j];
182           else             value = v[i+j*m];
183           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
184           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
185         } else if (in[j] < 0) continue;
186 #if defined(PETSC_USE_DEBUG)
187         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
188 #endif
189         else {  /* off-diag entry (B) */
190           if (mat->was_assembled) {
191             if (!baij->colmap) {
192               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
193             }
194 #if defined(PETSC_USE_CTABLE)
195             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
196             col  = col - 1;
197 #else
198             col = baij->colmap[in[j]/bs] - 1;
199 #endif
200             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
201               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
202               col  =  in[j];
203               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
204               B    = baij->B;
205               b    = (Mat_SeqBAIJ*)(B)->data;
206               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
207               ba   = b->a;
208             } else col += in[j]%bs;
209           } else col = in[j];
210           if (roworiented) value = v[i*n+j];
211           else             value = v[i+j*m];
212           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
213           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
214         }
215       }
216     } else {  /* off processor entry */
217       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
218       if (!baij->donotstash) {
219         mat->assembled = PETSC_FALSE;
220         n_loc          = 0;
221         for (j=0; j<n; j++) {
222           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
223           in_loc[n_loc] = in[j];
224           if (roworiented) {
225             v_loc[n_loc] = v[i*n+j];
226           } else {
227             v_loc[n_loc] = v[j*m+i];
228           }
229           n_loc++;
230         }
231         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ_Inlined"
240 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
241 {
242   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
243   PetscErrorCode    ierr;
244   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
245   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
246   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
247   PetscBool         roworiented=a->roworiented;
248   const PetscScalar *value     = v;
249   MatScalar         *ap,*aa = a->a,*bap;
250 
251   PetscFunctionBegin;
252   if (col < row) {
253     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
254     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
255   }
256   rp   = aj + ai[row];
257   ap   = aa + bs2*ai[row];
258   rmax = imax[row];
259   nrow = ailen[row];
260   value = v;
261   low   = 0;
262   high  = nrow;
263 
264   while (high-low > 7) {
265     t = (low+high)/2;
266     if (rp[t] > col) high = t;
267     else             low  = t;
268   }
269   for (i=low; i<high; i++) {
270     if (rp[i] > col) break;
271     if (rp[i] == col) {
272       bap = ap +  bs2*i;
273       if (roworiented) {
274         if (is == ADD_VALUES) {
275           for (ii=0; ii<bs; ii++) {
276             for (jj=ii; jj<bs2; jj+=bs) {
277               bap[jj] += *value++;
278             }
279           }
280         } else {
281           for (ii=0; ii<bs; ii++) {
282             for (jj=ii; jj<bs2; jj+=bs) {
283               bap[jj] = *value++;
284             }
285           }
286         }
287       } else {
288         if (is == ADD_VALUES) {
289           for (ii=0; ii<bs; ii++) {
290             for (jj=0; jj<bs; jj++) {
291               *bap++ += *value++;
292             }
293           }
294         } else {
295           for (ii=0; ii<bs; ii++) {
296             for (jj=0; jj<bs; jj++) {
297               *bap++  = *value++;
298             }
299           }
300         }
301       }
302       goto noinsert2;
303     }
304   }
305   if (nonew == 1) goto noinsert2;
306   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
307   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
308   N = nrow++ - 1; high++;
309   /* shift up all the later entries in this row */
310   for (ii=N; ii>=i; ii--) {
311     rp[ii+1] = rp[ii];
312     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
313   }
314   if (N >= i) {
315     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
316   }
317   rp[i] = col;
318   bap   = ap +  bs2*i;
319   if (roworiented) {
320     for (ii=0; ii<bs; ii++) {
321       for (jj=ii; jj<bs2; jj+=bs) {
322         bap[jj] = *value++;
323       }
324     }
325   } else {
326     for (ii=0; ii<bs; ii++) {
327       for (jj=0; jj<bs; jj++) {
328         *bap++ = *value++;
329       }
330     }
331   }
332   noinsert2:;
333   ailen[row] = nrow;
334   PetscFunctionReturn(0);
335 }
336 
337 #undef __FUNCT__
338 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined"
339 /*
340    This routine is exactly duplicated in mpibaij.c
341 */
342 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
343 {
344   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
345   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
346   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
347   PetscErrorCode    ierr;
348   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
349   PetscBool         roworiented=a->roworiented;
350   const PetscScalar *value     = v;
351   MatScalar         *ap,*aa = a->a,*bap;
352 
353   PetscFunctionBegin;
354   rp   = aj + ai[row];
355   ap   = aa + bs2*ai[row];
356   rmax = imax[row];
357   nrow = ailen[row];
358   low  = 0;
359   high = nrow;
360   value = v;
361   while (high-low > 7) {
362     t = (low+high)/2;
363     if (rp[t] > col) high = t;
364     else             low  = t;
365   }
366   for (i=low; i<high; i++) {
367     if (rp[i] > col) break;
368     if (rp[i] == col) {
369       bap = ap +  bs2*i;
370       if (roworiented) {
371         if (is == ADD_VALUES) {
372           for (ii=0; ii<bs; ii++) {
373             for (jj=ii; jj<bs2; jj+=bs) {
374               bap[jj] += *value++;
375             }
376           }
377         } else {
378           for (ii=0; ii<bs; ii++) {
379             for (jj=ii; jj<bs2; jj+=bs) {
380               bap[jj] = *value++;
381             }
382           }
383         }
384       } else {
385         if (is == ADD_VALUES) {
386           for (ii=0; ii<bs; ii++,value+=bs) {
387             for (jj=0; jj<bs; jj++) {
388               bap[jj] += value[jj];
389             }
390             bap += bs;
391           }
392         } else {
393           for (ii=0; ii<bs; ii++,value+=bs) {
394             for (jj=0; jj<bs; jj++) {
395               bap[jj]  = value[jj];
396             }
397             bap += bs;
398           }
399         }
400       }
401       goto noinsert2;
402     }
403   }
404   if (nonew == 1) goto noinsert2;
405   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
406   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
407   N = nrow++ - 1; high++;
408   /* shift up all the later entries in this row */
409   for (ii=N; ii>=i; ii--) {
410     rp[ii+1] = rp[ii];
411     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
412   }
413   if (N >= i) {
414     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
415   }
416   rp[i] = col;
417   bap   = ap +  bs2*i;
418   if (roworiented) {
419     for (ii=0; ii<bs; ii++) {
420       for (jj=ii; jj<bs2; jj+=bs) {
421         bap[jj] = *value++;
422       }
423     }
424   } else {
425     for (ii=0; ii<bs; ii++) {
426       for (jj=0; jj<bs; jj++) {
427         *bap++ = *value++;
428       }
429     }
430   }
431   noinsert2:;
432   ailen[row] = nrow;
433   PetscFunctionReturn(0);
434 }
435 
436 #undef __FUNCT__
437 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
438 /*
439     This routine could be optimized by removing the need for the block copy below and passing stride information
440   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
441 */
442 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
443 {
444   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
445   const MatScalar *value;
446   MatScalar       *barray     =baij->barray;
447   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
448   PetscErrorCode  ierr;
449   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
450   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
451   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
452 
453   PetscFunctionBegin;
454   if (!barray) {
455     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
456     baij->barray = barray;
457   }
458 
459   if (roworiented) {
460     stepval = (n-1)*bs;
461   } else {
462     stepval = (m-1)*bs;
463   }
464   for (i=0; i<m; i++) {
465     if (im[i] < 0) continue;
466 #if defined(PETSC_USE_DEBUG)
467     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
468 #endif
469     if (im[i] >= rstart && im[i] < rend) {
470       row = im[i] - rstart;
471       for (j=0; j<n; j++) {
472         if (im[i] > in[j]) {
473           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
474           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
475         }
476         /* If NumCol = 1 then a copy is not required */
477         if ((roworiented) && (n == 1)) {
478           barray = (MatScalar*) v + i*bs2;
479         } else if ((!roworiented) && (m == 1)) {
480           barray = (MatScalar*) v + j*bs2;
481         } else { /* Here a copy is required */
482           if (roworiented) {
483             value = v + i*(stepval+bs)*bs + j*bs;
484           } else {
485             value = v + j*(stepval+bs)*bs + i*bs;
486           }
487           for (ii=0; ii<bs; ii++,value+=stepval) {
488             for (jj=0; jj<bs; jj++) {
489               *barray++ = *value++;
490             }
491           }
492           barray -=bs2;
493         }
494 
495         if (in[j] >= cstart && in[j] < cend) {
496           col  = in[j] - cstart;
497           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
498         } else if (in[j] < 0) continue;
499 #if defined(PETSC_USE_DEBUG)
500         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
501 #endif
502         else {
503           if (mat->was_assembled) {
504             if (!baij->colmap) {
505               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
506             }
507 
508 #if defined(PETSC_USE_DEBUG)
509 #if defined(PETSC_USE_CTABLE)
510             { PetscInt data;
511               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
512               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
513             }
514 #else
515             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
516 #endif
517 #endif
518 #if defined(PETSC_USE_CTABLE)
519             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
520             col  = (col - 1)/bs;
521 #else
522             col = (baij->colmap[in[j]] - 1)/bs;
523 #endif
524             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
525               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
526               col  = in[j];
527             }
528           } else col = in[j];
529           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
530         }
531       }
532     } else {
533       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
534       if (!baij->donotstash) {
535         if (roworiented) {
536           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
537         } else {
538           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
539         }
540       }
541     }
542   }
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatGetValues_MPISBAIJ"
548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
549 {
550   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
551   PetscErrorCode ierr;
552   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
553   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
554 
555   PetscFunctionBegin;
556   for (i=0; i<m; i++) {
557     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
558     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
559     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
560       row = idxm[i] - bsrstart;
561       for (j=0; j<n; j++) {
562         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
563         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
564         if (idxn[j] >= bscstart && idxn[j] < bscend) {
565           col  = idxn[j] - bscstart;
566           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
567         } else {
568           if (!baij->colmap) {
569             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
570           }
571 #if defined(PETSC_USE_CTABLE)
572           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
573           data--;
574 #else
575           data = baij->colmap[idxn[j]/bs]-1;
576 #endif
577           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
578           else {
579             col  = data + idxn[j]%bs;
580             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
581           }
582         }
583       }
584     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
585   }
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatNorm_MPISBAIJ"
591 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
592 {
593   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
594   PetscErrorCode ierr;
595   PetscReal      sum[2],*lnorm2;
596 
597   PetscFunctionBegin;
598   if (baij->size == 1) {
599     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
600   } else {
601     if (type == NORM_FROBENIUS) {
602       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
603       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
604       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
605       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
606       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
607       ierr    = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
608       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
609       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
610     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
611       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
612       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
613       PetscReal    *rsum,*rsum2,vabs;
614       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
615       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
616       MatScalar    *v;
617 
618       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
619       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
620       /* Amat */
621       v = amat->a; jj = amat->j;
622       for (brow=0; brow<mbs; brow++) {
623         grow = bs*(rstart + brow);
624         nz   = amat->i[brow+1] - amat->i[brow];
625         for (bcol=0; bcol<nz; bcol++) {
626           gcol = bs*(rstart + *jj); jj++;
627           for (col=0; col<bs; col++) {
628             for (row=0; row<bs; row++) {
629               vabs            = PetscAbsScalar(*v); v++;
630               rsum[gcol+col] += vabs;
631               /* non-diagonal block */
632               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
633             }
634           }
635         }
636       }
637       /* Bmat */
638       v = bmat->a; jj = bmat->j;
639       for (brow=0; brow<mbs; brow++) {
640         grow = bs*(rstart + brow);
641         nz = bmat->i[brow+1] - bmat->i[brow];
642         for (bcol=0; bcol<nz; bcol++) {
643           gcol = bs*garray[*jj]; jj++;
644           for (col=0; col<bs; col++) {
645             for (row=0; row<bs; row++) {
646               vabs            = PetscAbsScalar(*v); v++;
647               rsum[gcol+col] += vabs;
648               rsum[grow+row] += vabs;
649             }
650           }
651         }
652       }
653       ierr  = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
654       *norm = 0.0;
655       for (col=0; col<mat->cmap->N; col++) {
656         if (rsum2[col] > *norm) *norm = rsum2[col];
657       }
658       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
659     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
660   }
661   PetscFunctionReturn(0);
662 }
663 
664 #undef __FUNCT__
665 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
666 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
667 {
668   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
669   PetscErrorCode ierr;
670   PetscInt       nstash,reallocs;
671   InsertMode     addv;
672 
673   PetscFunctionBegin;
674   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
675 
676   /* make sure all processors are either in INSERTMODE or ADDMODE */
677   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
678   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
679   mat->insertmode = addv; /* in case this processor had no cache */
680 
681   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
682   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
683   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
684   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
685   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
686   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 
690 #undef __FUNCT__
691 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
692 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
693 {
694   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
695   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
696   PetscErrorCode ierr;
697   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
698   PetscInt       *row,*col;
699   PetscBool      other_disassembled;
700   PetscMPIInt    n;
701   PetscBool      r1,r2,r3;
702   MatScalar      *val;
703   InsertMode     addv = mat->insertmode;
704 
705   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
706   PetscFunctionBegin;
707   if (!baij->donotstash &&  !mat->nooffprocentries) {
708     while (1) {
709       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
710       if (!flg) break;
711 
712       for (i=0; i<n;) {
713         /* Now identify the consecutive vals belonging to the same row */
714         for (j=i,rstart=row[j]; j<n; j++) {
715           if (row[j] != rstart) break;
716         }
717         if (j < n) ncols = j-i;
718         else       ncols = n-i;
719         /* Now assemble all these values with a single function call */
720         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
721         i    = j;
722       }
723     }
724     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
725     /* Now process the block-stash. Since the values are stashed column-oriented,
726        set the roworiented flag to column oriented, and after MatSetValues()
727        restore the original flags */
728     r1 = baij->roworiented;
729     r2 = a->roworiented;
730     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
731 
732     baij->roworiented = PETSC_FALSE;
733     a->roworiented    = PETSC_FALSE;
734 
735     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
736     while (1) {
737       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
738       if (!flg) break;
739 
740       for (i=0; i<n;) {
741         /* Now identify the consecutive vals belonging to the same row */
742         for (j=i,rstart=row[j]; j<n; j++) {
743           if (row[j] != rstart) break;
744         }
745         if (j < n) ncols = j-i;
746         else       ncols = n-i;
747         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
748         i    = j;
749       }
750     }
751     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
752 
753     baij->roworiented = r1;
754     a->roworiented    = r2;
755 
756     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
757   }
758 
759   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
760   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
761 
762   /* determine if any processor has disassembled, if so we must
763      also disassemble ourselfs, in order that we may reassemble. */
764   /*
765      if nonzero structure of submatrix B cannot change then we know that
766      no processor disassembled thus we can skip this stuff
767   */
768   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
769     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
770     if (mat->was_assembled && !other_disassembled) {
771       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
772     }
773   }
774 
775   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
776     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
777   }
778   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
779   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
780 
781   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
782 
783   baij->rowvalues = 0;
784 
785   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
786   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
787     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
788     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
789   }
790   PetscFunctionReturn(0);
791 }
792 
793 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer);
794 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
795 #include <petscdraw.h>
796 #undef __FUNCT__
797 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
798 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
799 {
800   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
801   PetscErrorCode    ierr;
802   PetscInt          bs   = mat->rmap->bs;
803   PetscMPIInt       rank = baij->rank;
804   PetscBool         iascii,isdraw;
805   PetscViewer       sviewer;
806   PetscViewerFormat format;
807 
808   PetscFunctionBegin;
809   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
810   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
811   if (iascii) {
812     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
813     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
814       MatInfo info;
815       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
816       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
817       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
818       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
819       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
820       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
821       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
822       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
823       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
824       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
825       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
826       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
827       PetscFunctionReturn(0);
828     } else if (format == PETSC_VIEWER_ASCII_INFO) {
829       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
830       PetscFunctionReturn(0);
831     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
832       PetscFunctionReturn(0);
833     }
834   }
835 
836   if (isdraw) {
837     PetscDraw draw;
838     PetscBool isnull;
839     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
840     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
841   }
842 
843   {
844     /* assemble the entire matrix onto first processor. */
845     Mat          A;
846     Mat_SeqSBAIJ *Aloc;
847     Mat_SeqBAIJ  *Bloc;
848     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
849     MatScalar    *a;
850     const char   *matname;
851 
852     /* Should this be the same type as mat? */
853     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
854     if (!rank) {
855       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
856     } else {
857       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
858     }
859     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
860     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
861     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
862     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
863 
864     /* copy over the A part */
865     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
866     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
867     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
868 
869     for (i=0; i<mbs; i++) {
870       rvals[0] = bs*(baij->rstartbs + i);
871       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
872       for (j=ai[i]; j<ai[i+1]; j++) {
873         col = (baij->cstartbs+aj[j])*bs;
874         for (k=0; k<bs; k++) {
875           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
876           col++;
877           a += bs;
878         }
879       }
880     }
881     /* copy over the B part */
882     Bloc = (Mat_SeqBAIJ*)baij->B->data;
883     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
884     for (i=0; i<mbs; i++) {
885 
886       rvals[0] = bs*(baij->rstartbs + i);
887       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
888       for (j=ai[i]; j<ai[i+1]; j++) {
889         col = baij->garray[aj[j]]*bs;
890         for (k=0; k<bs; k++) {
891           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
892           col++;
893           a += bs;
894         }
895       }
896     }
897     ierr = PetscFree(rvals);CHKERRQ(ierr);
898     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
899     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
900     /*
901        Everyone has to call to draw the matrix since the graphics waits are
902        synchronized across all processors that share the PetscDraw object
903     */
904     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
905     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
906     if (!rank) {
907       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
908       ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
909     }
910     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
911     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
912     ierr = MatDestroy(&A);CHKERRQ(ierr);
913   }
914   PetscFunctionReturn(0);
915 }
916 
917 #undef __FUNCT__
918 #define __FUNCT__ "MatView_MPISBAIJ"
919 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
920 {
921   PetscErrorCode ierr;
922   PetscBool      iascii,isdraw,issocket,isbinary;
923 
924   PetscFunctionBegin;
925   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
926   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
927   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
928   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
929   if (iascii || isdraw || issocket || isbinary) {
930     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
931   }
932   PetscFunctionReturn(0);
933 }
934 
935 #undef __FUNCT__
936 #define __FUNCT__ "MatDestroy_MPISBAIJ"
937 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
938 {
939   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
940   PetscErrorCode ierr;
941 
942   PetscFunctionBegin;
943 #if defined(PETSC_USE_LOG)
944   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
945 #endif
946   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
947   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
948   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
949   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
950 #if defined(PETSC_USE_CTABLE)
951   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
952 #else
953   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
954 #endif
955   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
956   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
957   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
958   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
959   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
960   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
961   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
962   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
963   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
964   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
965   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
966   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
967   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
968   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
969   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
970 #if defined(PETSC_USE_REAL_MAT_SINGLE)
971   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
972 #endif
973   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
974   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
975   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
976   ierr = PetscFree(mat->data);CHKERRQ(ierr);
977 
978   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
979   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
980   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
981   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
982   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
983   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
984 #if defined(PETSC_HAVE_ELEMENTAL)
985   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
986 #endif
987   PetscFunctionReturn(0);
988 }
989 
990 #undef __FUNCT__
991 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
992 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
993 {
994   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
995   PetscErrorCode    ierr;
996   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
997   PetscScalar       *from;
998   const PetscScalar *x;
999 
1000   PetscFunctionBegin;
1001   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1002   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1003 
1004   /* diagonal part */
1005   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1006   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1007 
1008   /* subdiagonal part */
1009   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1010 
1011   /* copy x into the vec slvec0 */
1012   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1013   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1014 
1015   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1016   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1017   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1018 
1019   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1020   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1021   /* supperdiagonal part */
1022   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1023   PetscFunctionReturn(0);
1024 }
1025 
1026 #undef __FUNCT__
1027 #define __FUNCT__ "MatMult_MPISBAIJ"
1028 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1029 {
1030   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1031   PetscErrorCode    ierr;
1032   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1033   PetscScalar       *from;
1034   const PetscScalar *x;
1035 
1036   PetscFunctionBegin;
1037   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1038   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1039 
1040   /* diagonal part */
1041   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1042   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1043 
1044   /* subdiagonal part */
1045   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1046 
1047   /* copy x into the vec slvec0 */
1048   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1049   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1050 
1051   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1052   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1053   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1054 
1055   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1056   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1057   /* supperdiagonal part */
1058   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1059   PetscFunctionReturn(0);
1060 }
1061 
1062 #undef __FUNCT__
1063 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
1064 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1065 {
1066   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1067   PetscErrorCode ierr;
1068   PetscInt       nt;
1069 
1070   PetscFunctionBegin;
1071   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1072   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1073 
1074   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1075   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1076 
1077   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1078   /* do diagonal part */
1079   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1080   /* do supperdiagonal part */
1081   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1082   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1083   /* do subdiagonal part */
1084   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1085   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1086   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 #undef __FUNCT__
1091 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1092 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1093 {
1094   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1095   PetscErrorCode    ierr;
1096   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1097   PetscScalar       *from,zero=0.0;
1098   const PetscScalar *x;
1099 
1100   PetscFunctionBegin;
1101   /*
1102   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
1103   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
1104   */
1105   /* diagonal part */
1106   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1107   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1108 
1109   /* subdiagonal part */
1110   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1111 
1112   /* copy x into the vec slvec0 */
1113   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1114   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1115   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1116   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1117 
1118   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1119   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1120   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1121 
1122   /* supperdiagonal part */
1123   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1124   PetscFunctionReturn(0);
1125 }
1126 
1127 #undef __FUNCT__
1128 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1129 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1130 {
1131   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1132   PetscErrorCode ierr;
1133 
1134   PetscFunctionBegin;
1135   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1136   /* do diagonal part */
1137   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1138   /* do supperdiagonal part */
1139   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1140   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1141 
1142   /* do subdiagonal part */
1143   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1144   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1145   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 /*
1150   This only works correctly for square matrices where the subblock A->A is the
1151    diagonal block
1152 */
1153 #undef __FUNCT__
1154 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1155 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1156 {
1157   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1158   PetscErrorCode ierr;
1159 
1160   PetscFunctionBegin;
1161   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1162   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1163   PetscFunctionReturn(0);
1164 }
1165 
1166 #undef __FUNCT__
1167 #define __FUNCT__ "MatScale_MPISBAIJ"
1168 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1169 {
1170   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1171   PetscErrorCode ierr;
1172 
1173   PetscFunctionBegin;
1174   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1175   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1176   PetscFunctionReturn(0);
1177 }
1178 
1179 #undef __FUNCT__
1180 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1181 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1182 {
1183   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1184   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1185   PetscErrorCode ierr;
1186   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1187   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1188   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1189 
1190   PetscFunctionBegin;
1191   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1192   mat->getrowactive = PETSC_TRUE;
1193 
1194   if (!mat->rowvalues && (idx || v)) {
1195     /*
1196         allocate enough space to hold information from the longest row.
1197     */
1198     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1199     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1200     PetscInt     max = 1,mbs = mat->mbs,tmp;
1201     for (i=0; i<mbs; i++) {
1202       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1203       if (max < tmp) max = tmp;
1204     }
1205     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1206   }
1207 
1208   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1209   lrow = row - brstart;  /* local row index */
1210 
1211   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1212   if (!v)   {pvA = 0; pvB = 0;}
1213   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1214   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1215   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1216   nztot = nzA + nzB;
1217 
1218   cmap = mat->garray;
1219   if (v  || idx) {
1220     if (nztot) {
1221       /* Sort by increasing column numbers, assuming A and B already sorted */
1222       PetscInt imark = -1;
1223       if (v) {
1224         *v = v_p = mat->rowvalues;
1225         for (i=0; i<nzB; i++) {
1226           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1227           else break;
1228         }
1229         imark = i;
1230         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1231         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1232       }
1233       if (idx) {
1234         *idx = idx_p = mat->rowindices;
1235         if (imark > -1) {
1236           for (i=0; i<imark; i++) {
1237             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1238           }
1239         } else {
1240           for (i=0; i<nzB; i++) {
1241             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1242             else break;
1243           }
1244           imark = i;
1245         }
1246         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1247         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1248       }
1249     } else {
1250       if (idx) *idx = 0;
1251       if (v)   *v   = 0;
1252     }
1253   }
1254   *nz  = nztot;
1255   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1256   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1262 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1263 {
1264   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1265 
1266   PetscFunctionBegin;
1267   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1268   baij->getrowactive = PETSC_FALSE;
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 #undef __FUNCT__
1273 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1274 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1275 {
1276   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1277   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1278 
1279   PetscFunctionBegin;
1280   aA->getrow_utriangular = PETSC_TRUE;
1281   PetscFunctionReturn(0);
1282 }
1283 #undef __FUNCT__
1284 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1285 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1286 {
1287   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1288   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1289 
1290   PetscFunctionBegin;
1291   aA->getrow_utriangular = PETSC_FALSE;
1292   PetscFunctionReturn(0);
1293 }
1294 
1295 #undef __FUNCT__
1296 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1297 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1298 {
1299   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1300   PetscErrorCode ierr;
1301 
1302   PetscFunctionBegin;
1303   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1304   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 #undef __FUNCT__
1309 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1310 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1311 {
1312   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1313   PetscErrorCode ierr;
1314 
1315   PetscFunctionBegin;
1316   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1317   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ()
1322    Input: isrow       - distributed(parallel),
1323           iscol_local - locally owned (seq)
1324 */
1325 #undef __FUNCT__
1326 #define __FUNCT__ "ISEqual_private"
1327 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1328 {
1329   PetscErrorCode ierr;
1330   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1331   const PetscInt *ptr1,*ptr2;
1332 
1333   PetscFunctionBegin;
1334   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1335   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1336   if (sz1 > sz2) {
1337     *flg = PETSC_FALSE;
1338     PetscFunctionReturn(0);
1339   }
1340 
1341   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1342   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1343 
1344   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1345   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1346   ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr);
1347   ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr);
1348   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1349   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1350 
1351   nmatch=0;
1352   k     = 0;
1353   for (i=0; i<sz1; i++){
1354     for (j=k; j<sz2; j++){
1355       if (a1[i] == a2[j]) {
1356         k = j; nmatch++;
1357         break;
1358       }
1359     }
1360   }
1361   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1362   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1363   ierr = PetscFree(a1);CHKERRQ(ierr);
1364   ierr = PetscFree(a2);CHKERRQ(ierr);
1365   if (nmatch < sz1) {
1366     *flg = PETSC_FALSE;
1367   } else {
1368     *flg = PETSC_TRUE;
1369   }
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 #undef __FUNCT__
1374 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ"
1375 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1376 {
1377   PetscErrorCode ierr;
1378   IS             iscol_local;
1379   PetscInt       csize;
1380   PetscBool      isequal;
1381 
1382   PetscFunctionBegin;
1383   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1384   if (call == MAT_REUSE_MATRIX) {
1385     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1386     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1387   } else {
1388     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1389     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1390     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1391   }
1392 
1393   /* now call MatGetSubMatrix_MPIBAIJ() */
1394   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1395   if (call == MAT_INITIAL_MATRIX) {
1396     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1397     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1398   }
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 #undef __FUNCT__
1403 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1404 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1405 {
1406   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1407   PetscErrorCode ierr;
1408 
1409   PetscFunctionBegin;
1410   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1411   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1412   PetscFunctionReturn(0);
1413 }
1414 
1415 #undef __FUNCT__
1416 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1417 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1418 {
1419   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1420   Mat            A  = a->A,B = a->B;
1421   PetscErrorCode ierr;
1422   PetscReal      isend[5],irecv[5];
1423 
1424   PetscFunctionBegin;
1425   info->block_size = (PetscReal)matin->rmap->bs;
1426 
1427   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1428 
1429   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1430   isend[3] = info->memory;  isend[4] = info->mallocs;
1431 
1432   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1433 
1434   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1435   isend[3] += info->memory;  isend[4] += info->mallocs;
1436   if (flag == MAT_LOCAL) {
1437     info->nz_used      = isend[0];
1438     info->nz_allocated = isend[1];
1439     info->nz_unneeded  = isend[2];
1440     info->memory       = isend[3];
1441     info->mallocs      = isend[4];
1442   } else if (flag == MAT_GLOBAL_MAX) {
1443     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1444 
1445     info->nz_used      = irecv[0];
1446     info->nz_allocated = irecv[1];
1447     info->nz_unneeded  = irecv[2];
1448     info->memory       = irecv[3];
1449     info->mallocs      = irecv[4];
1450   } else if (flag == MAT_GLOBAL_SUM) {
1451     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1452 
1453     info->nz_used      = irecv[0];
1454     info->nz_allocated = irecv[1];
1455     info->nz_unneeded  = irecv[2];
1456     info->memory       = irecv[3];
1457     info->mallocs      = irecv[4];
1458   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1459   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1460   info->fill_ratio_needed = 0;
1461   info->factor_mallocs    = 0;
1462   PetscFunctionReturn(0);
1463 }
1464 
1465 #undef __FUNCT__
1466 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1467 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1468 {
1469   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1470   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1471   PetscErrorCode ierr;
1472 
1473   PetscFunctionBegin;
1474   switch (op) {
1475   case MAT_NEW_NONZERO_LOCATIONS:
1476   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1477   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1478   case MAT_KEEP_NONZERO_PATTERN:
1479   case MAT_NEW_NONZERO_LOCATION_ERR:
1480     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1481     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1482     break;
1483   case MAT_ROW_ORIENTED:
1484     a->roworiented = flg;
1485 
1486     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1487     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1488     break;
1489   case MAT_NEW_DIAGONALS:
1490     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1491     break;
1492   case MAT_IGNORE_OFF_PROC_ENTRIES:
1493     a->donotstash = flg;
1494     break;
1495   case MAT_USE_HASH_TABLE:
1496     a->ht_flag = flg;
1497     break;
1498   case MAT_HERMITIAN:
1499     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1500     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1501 
1502     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1503     break;
1504   case MAT_SPD:
1505     A->spd_set = PETSC_TRUE;
1506     A->spd     = flg;
1507     if (flg) {
1508       A->symmetric                  = PETSC_TRUE;
1509       A->structurally_symmetric     = PETSC_TRUE;
1510       A->symmetric_set              = PETSC_TRUE;
1511       A->structurally_symmetric_set = PETSC_TRUE;
1512     }
1513     break;
1514   case MAT_SYMMETRIC:
1515     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1516     break;
1517   case MAT_STRUCTURALLY_SYMMETRIC:
1518     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1519     break;
1520   case MAT_SYMMETRY_ETERNAL:
1521     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1522     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1523     break;
1524   case MAT_IGNORE_LOWER_TRIANGULAR:
1525     aA->ignore_ltriangular = flg;
1526     break;
1527   case MAT_ERROR_LOWER_TRIANGULAR:
1528     aA->ignore_ltriangular = flg;
1529     break;
1530   case MAT_GETROW_UPPERTRIANGULAR:
1531     aA->getrow_utriangular = flg;
1532     break;
1533   default:
1534     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1535   }
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 #undef __FUNCT__
1540 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1541 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1542 {
1543   PetscErrorCode ierr;
1544 
1545   PetscFunctionBegin;
1546   if (MAT_INITIAL_MATRIX || *B != A) {
1547     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1548   }
1549   PetscFunctionReturn(0);
1550 }
1551 
1552 #undef __FUNCT__
1553 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1554 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1555 {
1556   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1557   Mat            a     = baij->A, b=baij->B;
1558   PetscErrorCode ierr;
1559   PetscInt       nv,m,n;
1560   PetscBool      flg;
1561 
1562   PetscFunctionBegin;
1563   if (ll != rr) {
1564     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1565     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1566   }
1567   if (!ll) PetscFunctionReturn(0);
1568 
1569   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1570   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1571 
1572   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1573   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1574 
1575   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1576 
1577   /* left diagonalscale the off-diagonal part */
1578   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1579 
1580   /* scale the diagonal part */
1581   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1582 
1583   /* right diagonalscale the off-diagonal part */
1584   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1585   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 #undef __FUNCT__
1590 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1591 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1592 {
1593   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1594   PetscErrorCode ierr;
1595 
1596   PetscFunctionBegin;
1597   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1598   PetscFunctionReturn(0);
1599 }
1600 
1601 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1602 
1603 #undef __FUNCT__
1604 #define __FUNCT__ "MatEqual_MPISBAIJ"
1605 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1606 {
1607   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1608   Mat            a,b,c,d;
1609   PetscBool      flg;
1610   PetscErrorCode ierr;
1611 
1612   PetscFunctionBegin;
1613   a = matA->A; b = matA->B;
1614   c = matB->A; d = matB->B;
1615 
1616   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1617   if (flg) {
1618     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1619   }
1620   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1621   PetscFunctionReturn(0);
1622 }
1623 
1624 #undef __FUNCT__
1625 #define __FUNCT__ "MatCopy_MPISBAIJ"
1626 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1627 {
1628   PetscErrorCode ierr;
1629   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1630   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1631 
1632   PetscFunctionBegin;
1633   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1634   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1635     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1636     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1637     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1638   } else {
1639     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1640     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1641   }
1642   PetscFunctionReturn(0);
1643 }
1644 
1645 #undef __FUNCT__
1646 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1647 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1648 {
1649   PetscErrorCode ierr;
1650 
1651   PetscFunctionBegin;
1652   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1653   PetscFunctionReturn(0);
1654 }
1655 
1656 #undef __FUNCT__
1657 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1658 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1659 {
1660   PetscErrorCode ierr;
1661   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1662   PetscBLASInt   bnz,one=1;
1663   Mat_SeqSBAIJ   *xa,*ya;
1664   Mat_SeqBAIJ    *xb,*yb;
1665 
1666   PetscFunctionBegin;
1667   if (str == SAME_NONZERO_PATTERN) {
1668     PetscScalar alpha = a;
1669     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1670     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1671     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1672     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1673     xb   = (Mat_SeqBAIJ*)xx->B->data;
1674     yb   = (Mat_SeqBAIJ*)yy->B->data;
1675     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1676     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1677     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1678   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1679     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1680     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1681     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1682   } else {
1683     Mat      B;
1684     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1685     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1686     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1687     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1688     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1689     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1690     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1691     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1692     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1693     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1694     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1695     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1696     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1697     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1698     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1699     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
1700     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1701     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1702     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1703     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1704   }
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 #undef __FUNCT__
1709 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1710 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1711 {
1712   PetscErrorCode ierr;
1713   PetscInt       i;
1714   PetscBool      flg,sorted;
1715 
1716   PetscFunctionBegin;
1717   for (i = 0; i < n; i++) {
1718     ierr = ISSorted(irow[i],&sorted);CHKERRQ(ierr);
1719     if (!sorted) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row index set %d not sorted",i);
1720     ierr = ISSorted(icol[i],&sorted);CHKERRQ(ierr);
1721     if (!sorted) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Column index set %d not sorted",i);
1722   }
1723   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1724   for (i=0; i<n; i++) {
1725     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1726     if (!flg) { /* *B[i] is non-symmetric, set flag */
1727       ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr);
1728     }
1729   }
1730   PetscFunctionReturn(0);
1731 }
1732 
1733 #undef __FUNCT__
1734 #define __FUNCT__ "MatShift_MPISBAIJ"
1735 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1736 {
1737   PetscErrorCode ierr;
1738   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1739   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1740 
1741   PetscFunctionBegin;
1742   if (!Y->preallocated) {
1743     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1744   } else if (!aij->nz) {
1745     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1746   }
1747   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1748   PetscFunctionReturn(0);
1749 }
1750 
1751 /* -------------------------------------------------------------------*/
1752 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1753                                        MatGetRow_MPISBAIJ,
1754                                        MatRestoreRow_MPISBAIJ,
1755                                        MatMult_MPISBAIJ,
1756                                /*  4*/ MatMultAdd_MPISBAIJ,
1757                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1758                                        MatMultAdd_MPISBAIJ,
1759                                        0,
1760                                        0,
1761                                        0,
1762                                /* 10*/ 0,
1763                                        0,
1764                                        0,
1765                                        MatSOR_MPISBAIJ,
1766                                        MatTranspose_MPISBAIJ,
1767                                /* 15*/ MatGetInfo_MPISBAIJ,
1768                                        MatEqual_MPISBAIJ,
1769                                        MatGetDiagonal_MPISBAIJ,
1770                                        MatDiagonalScale_MPISBAIJ,
1771                                        MatNorm_MPISBAIJ,
1772                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1773                                        MatAssemblyEnd_MPISBAIJ,
1774                                        MatSetOption_MPISBAIJ,
1775                                        MatZeroEntries_MPISBAIJ,
1776                                /* 24*/ 0,
1777                                        0,
1778                                        0,
1779                                        0,
1780                                        0,
1781                                /* 29*/ MatSetUp_MPISBAIJ,
1782                                        0,
1783                                        0,
1784                                        0,
1785                                        0,
1786                                /* 34*/ MatDuplicate_MPISBAIJ,
1787                                        0,
1788                                        0,
1789                                        0,
1790                                        0,
1791                                /* 39*/ MatAXPY_MPISBAIJ,
1792                                        MatGetSubMatrices_MPISBAIJ,
1793                                        MatIncreaseOverlap_MPISBAIJ,
1794                                        MatGetValues_MPISBAIJ,
1795                                        MatCopy_MPISBAIJ,
1796                                /* 44*/ 0,
1797                                        MatScale_MPISBAIJ,
1798                                        MatShift_MPISBAIJ,
1799                                        0,
1800                                        0,
1801                                /* 49*/ 0,
1802                                        0,
1803                                        0,
1804                                        0,
1805                                        0,
1806                                /* 54*/ 0,
1807                                        0,
1808                                        MatSetUnfactored_MPISBAIJ,
1809                                        0,
1810                                        MatSetValuesBlocked_MPISBAIJ,
1811                                /* 59*/ MatGetSubMatrix_MPISBAIJ,
1812                                        0,
1813                                        0,
1814                                        0,
1815                                        0,
1816                                /* 64*/ 0,
1817                                        0,
1818                                        0,
1819                                        0,
1820                                        0,
1821                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1822                                        0,
1823                                        0,
1824                                        0,
1825                                        0,
1826                                /* 74*/ 0,
1827                                        0,
1828                                        0,
1829                                        0,
1830                                        0,
1831                                /* 79*/ 0,
1832                                        0,
1833                                        0,
1834                                        0,
1835                                        MatLoad_MPISBAIJ,
1836                                /* 84*/ 0,
1837                                        0,
1838                                        0,
1839                                        0,
1840                                        0,
1841                                /* 89*/ 0,
1842                                        0,
1843                                        0,
1844                                        0,
1845                                        0,
1846                                /* 94*/ 0,
1847                                        0,
1848                                        0,
1849                                        0,
1850                                        0,
1851                                /* 99*/ 0,
1852                                        0,
1853                                        0,
1854                                        0,
1855                                        0,
1856                                /*104*/ 0,
1857                                        MatRealPart_MPISBAIJ,
1858                                        MatImaginaryPart_MPISBAIJ,
1859                                        MatGetRowUpperTriangular_MPISBAIJ,
1860                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1861                                /*109*/ 0,
1862                                        0,
1863                                        0,
1864                                        0,
1865                                        0,
1866                                /*114*/ 0,
1867                                        0,
1868                                        0,
1869                                        0,
1870                                        0,
1871                                /*119*/ 0,
1872                                        0,
1873                                        0,
1874                                        0,
1875                                        0,
1876                                /*124*/ 0,
1877                                        0,
1878                                        0,
1879                                        0,
1880                                        0,
1881                                /*129*/ 0,
1882                                        0,
1883                                        0,
1884                                        0,
1885                                        0,
1886                                /*134*/ 0,
1887                                        0,
1888                                        0,
1889                                        0,
1890                                        0,
1891                                /*139*/ 0,
1892                                        0,
1893                                        0,
1894                                        0,
1895                                        0,
1896                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
1897 };
1898 
1899 #undef __FUNCT__
1900 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1901 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1902 {
1903   PetscFunctionBegin;
1904   *a = ((Mat_MPISBAIJ*)A->data)->A;
1905   PetscFunctionReturn(0);
1906 }
1907 
1908 #undef __FUNCT__
1909 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1910 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1911 {
1912   Mat_MPISBAIJ   *b;
1913   PetscErrorCode ierr;
1914   PetscInt       i,mbs,Mbs;
1915 
1916   PetscFunctionBegin;
1917   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1918   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1919   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1920   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1921 
1922   b   = (Mat_MPISBAIJ*)B->data;
1923   mbs = B->rmap->n/bs;
1924   Mbs = B->rmap->N/bs;
1925   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1926 
1927   B->rmap->bs = bs;
1928   b->bs2      = bs*bs;
1929   b->mbs      = mbs;
1930   b->Mbs      = Mbs;
1931   b->nbs      = B->cmap->n/bs;
1932   b->Nbs      = B->cmap->N/bs;
1933 
1934   for (i=0; i<=b->size; i++) {
1935     b->rangebs[i] = B->rmap->range[i]/bs;
1936   }
1937   b->rstartbs = B->rmap->rstart/bs;
1938   b->rendbs   = B->rmap->rend/bs;
1939 
1940   b->cstartbs = B->cmap->rstart/bs;
1941   b->cendbs   = B->cmap->rend/bs;
1942 
1943   if (!B->preallocated) {
1944     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1945     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1946     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1947     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1948     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1949     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1950     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1951     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1952     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1953   }
1954 
1955   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1956   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1957 
1958   B->preallocated = PETSC_TRUE;
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1964 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1965 {
1966   PetscInt       m,rstart,cstart,cend;
1967   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1968   const PetscInt *JJ    =0;
1969   PetscScalar    *values=0;
1970   PetscErrorCode ierr;
1971 
1972   PetscFunctionBegin;
1973   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1974   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1975   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1976   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1977   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1978   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1979   m      = B->rmap->n/bs;
1980   rstart = B->rmap->rstart/bs;
1981   cstart = B->cmap->rstart/bs;
1982   cend   = B->cmap->rend/bs;
1983 
1984   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1985   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
1986   for (i=0; i<m; i++) {
1987     nz = ii[i+1] - ii[i];
1988     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1989     nz_max = PetscMax(nz_max,nz);
1990     JJ     = jj + ii[i];
1991     for (j=0; j<nz; j++) {
1992       if (*JJ >= cstart) break;
1993       JJ++;
1994     }
1995     d = 0;
1996     for (; j<nz; j++) {
1997       if (*JJ++ >= cend) break;
1998       d++;
1999     }
2000     d_nnz[i] = d;
2001     o_nnz[i] = nz - d;
2002   }
2003   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2004   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2005 
2006   values = (PetscScalar*)V;
2007   if (!values) {
2008     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2009     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2010   }
2011   for (i=0; i<m; i++) {
2012     PetscInt          row    = i + rstart;
2013     PetscInt          ncols  = ii[i+1] - ii[i];
2014     const PetscInt    *icols = jj + ii[i];
2015     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2016     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2017   }
2018 
2019   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2020   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2021   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2022   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2023   PetscFunctionReturn(0);
2024 }
2025 
2026 /*MC
2027    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2028    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2029    the matrix is stored.
2030 
2031   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2032   can call MatSetOption(Mat, MAT_HERMITIAN);
2033 
2034    Options Database Keys:
2035 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2036 
2037   Level: beginner
2038 
2039 .seealso: MatCreateMPISBAIJ
2040 M*/
2041 
2042 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
2043 
2044 #undef __FUNCT__
2045 #define __FUNCT__ "MatCreate_MPISBAIJ"
2046 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2047 {
2048   Mat_MPISBAIJ   *b;
2049   PetscErrorCode ierr;
2050   PetscBool      flg = PETSC_FALSE;
2051 
2052   PetscFunctionBegin;
2053   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2054   B->data = (void*)b;
2055   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2056 
2057   B->ops->destroy = MatDestroy_MPISBAIJ;
2058   B->ops->view    = MatView_MPISBAIJ;
2059   B->assembled    = PETSC_FALSE;
2060   B->insertmode   = NOT_SET_VALUES;
2061 
2062   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2063   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2064 
2065   /* build local table of row and column ownerships */
2066   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2067 
2068   /* build cache for off array entries formed */
2069   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2070 
2071   b->donotstash  = PETSC_FALSE;
2072   b->colmap      = NULL;
2073   b->garray      = NULL;
2074   b->roworiented = PETSC_TRUE;
2075 
2076   /* stuff used in block assembly */
2077   b->barray = 0;
2078 
2079   /* stuff used for matrix vector multiply */
2080   b->lvec    = 0;
2081   b->Mvctx   = 0;
2082   b->slvec0  = 0;
2083   b->slvec0b = 0;
2084   b->slvec1  = 0;
2085   b->slvec1a = 0;
2086   b->slvec1b = 0;
2087   b->sMvctx  = 0;
2088 
2089   /* stuff for MatGetRow() */
2090   b->rowindices   = 0;
2091   b->rowvalues    = 0;
2092   b->getrowactive = PETSC_FALSE;
2093 
2094   /* hash table stuff */
2095   b->ht           = 0;
2096   b->hd           = 0;
2097   b->ht_size      = 0;
2098   b->ht_flag      = PETSC_FALSE;
2099   b->ht_fact      = 0;
2100   b->ht_total_ct  = 0;
2101   b->ht_insert_ct = 0;
2102 
2103   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
2104   b->ijonly = PETSC_FALSE;
2105 
2106   b->in_loc = 0;
2107   b->v_loc  = 0;
2108   b->n_loc  = 0;
2109 
2110   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2111   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2112   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
2113   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2114   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2115   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
2116 #if defined(PETSC_HAVE_ELEMENTAL)
2117   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2118 #endif
2119 
2120   B->symmetric                  = PETSC_TRUE;
2121   B->structurally_symmetric     = PETSC_TRUE;
2122   B->symmetric_set              = PETSC_TRUE;
2123   B->structurally_symmetric_set = PETSC_TRUE;
2124 
2125   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2126   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2127   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2128   if (flg) {
2129     PetscReal fact = 1.39;
2130     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2131     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2132     if (fact <= 1.0) fact = 1.39;
2133     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2134     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2135   }
2136   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2137   PetscFunctionReturn(0);
2138 }
2139 
2140 /*MC
2141    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2142 
2143    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2144    and MATMPISBAIJ otherwise.
2145 
2146    Options Database Keys:
2147 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2148 
2149   Level: beginner
2150 
2151 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
2152 M*/
2153 
2154 #undef __FUNCT__
2155 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
2156 /*@C
2157    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2158    the user should preallocate the matrix storage by setting the parameters
2159    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2160    performance can be increased by more than a factor of 50.
2161 
2162    Collective on Mat
2163 
2164    Input Parameters:
2165 +  B - the matrix
2166 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2167           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2168 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2169            submatrix  (same for all local rows)
2170 .  d_nnz - array containing the number of block nonzeros in the various block rows
2171            in the upper triangular and diagonal part of the in diagonal portion of the local
2172            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2173            for the diagonal entry and set a value even if it is zero.
2174 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2175            submatrix (same for all local rows).
2176 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2177            off-diagonal portion of the local submatrix that is right of the diagonal
2178            (possibly different for each block row) or NULL.
2179 
2180 
2181    Options Database Keys:
2182 .   -mat_no_unroll - uses code that does not unroll the loops in the
2183                      block calculations (much slower)
2184 .   -mat_block_size - size of the blocks to use
2185 
2186    Notes:
2187 
2188    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2189    than it must be used on all processors that share the object for that argument.
2190 
2191    If the *_nnz parameter is given then the *_nz parameter is ignored
2192 
2193    Storage Information:
2194    For a square global matrix we define each processor's diagonal portion
2195    to be its local rows and the corresponding columns (a square submatrix);
2196    each processor's off-diagonal portion encompasses the remainder of the
2197    local matrix (a rectangular submatrix).
2198 
2199    The user can specify preallocated storage for the diagonal part of
2200    the local submatrix with either d_nz or d_nnz (not both).  Set
2201    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2202    memory allocation.  Likewise, specify preallocated storage for the
2203    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2204 
2205    You can call MatGetInfo() to get information on how effective the preallocation was;
2206    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2207    You can also run with the option -info and look for messages with the string
2208    malloc in them to see if additional memory allocation was needed.
2209 
2210    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2211    the figure below we depict these three local rows and all columns (0-11).
2212 
2213 .vb
2214            0 1 2 3 4 5 6 7 8 9 10 11
2215           --------------------------
2216    row 3  |. . . d d d o o o o  o  o
2217    row 4  |. . . d d d o o o o  o  o
2218    row 5  |. . . d d d o o o o  o  o
2219           --------------------------
2220 .ve
2221 
2222    Thus, any entries in the d locations are stored in the d (diagonal)
2223    submatrix, and any entries in the o locations are stored in the
2224    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2225    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2226 
2227    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2228    plus the diagonal part of the d matrix,
2229    and o_nz should indicate the number of block nonzeros per row in the o matrix
2230 
2231    In general, for PDE problems in which most nonzeros are near the diagonal,
2232    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2233    or you will get TERRIBLE performance; see the users' manual chapter on
2234    matrices.
2235 
2236    Level: intermediate
2237 
2238 .keywords: matrix, block, aij, compressed row, sparse, parallel
2239 
2240 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2241 @*/
2242 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2243 {
2244   PetscErrorCode ierr;
2245 
2246   PetscFunctionBegin;
2247   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2248   PetscValidType(B,1);
2249   PetscValidLogicalCollectiveInt(B,bs,2);
2250   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2251   PetscFunctionReturn(0);
2252 }
2253 
2254 #undef __FUNCT__
2255 #define __FUNCT__ "MatCreateSBAIJ"
2256 /*@C
2257    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2258    (block compressed row).  For good matrix assembly performance
2259    the user should preallocate the matrix storage by setting the parameters
2260    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2261    performance can be increased by more than a factor of 50.
2262 
2263    Collective on MPI_Comm
2264 
2265    Input Parameters:
2266 +  comm - MPI communicator
2267 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2268           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2269 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2270            This value should be the same as the local size used in creating the
2271            y vector for the matrix-vector product y = Ax.
2272 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2273            This value should be the same as the local size used in creating the
2274            x vector for the matrix-vector product y = Ax.
2275 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2276 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2277 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2278            submatrix  (same for all local rows)
2279 .  d_nnz - array containing the number of block nonzeros in the various block rows
2280            in the upper triangular portion of the in diagonal portion of the local
2281            (possibly different for each block block row) or NULL.
2282            If you plan to factor the matrix you must leave room for the diagonal entry and
2283            set its value even if it is zero.
2284 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2285            submatrix (same for all local rows).
2286 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2287            off-diagonal portion of the local submatrix (possibly different for
2288            each block row) or NULL.
2289 
2290    Output Parameter:
2291 .  A - the matrix
2292 
2293    Options Database Keys:
2294 .   -mat_no_unroll - uses code that does not unroll the loops in the
2295                      block calculations (much slower)
2296 .   -mat_block_size - size of the blocks to use
2297 .   -mat_mpi - use the parallel matrix data structures even on one processor
2298                (defaults to using SeqBAIJ format on one processor)
2299 
2300    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2301    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2302    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2303 
2304    Notes:
2305    The number of rows and columns must be divisible by blocksize.
2306    This matrix type does not support complex Hermitian operation.
2307 
2308    The user MUST specify either the local or global matrix dimensions
2309    (possibly both).
2310 
2311    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2312    than it must be used on all processors that share the object for that argument.
2313 
2314    If the *_nnz parameter is given then the *_nz parameter is ignored
2315 
2316    Storage Information:
2317    For a square global matrix we define each processor's diagonal portion
2318    to be its local rows and the corresponding columns (a square submatrix);
2319    each processor's off-diagonal portion encompasses the remainder of the
2320    local matrix (a rectangular submatrix).
2321 
2322    The user can specify preallocated storage for the diagonal part of
2323    the local submatrix with either d_nz or d_nnz (not both).  Set
2324    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2325    memory allocation.  Likewise, specify preallocated storage for the
2326    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2327 
2328    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2329    the figure below we depict these three local rows and all columns (0-11).
2330 
2331 .vb
2332            0 1 2 3 4 5 6 7 8 9 10 11
2333           --------------------------
2334    row 3  |. . . d d d o o o o  o  o
2335    row 4  |. . . d d d o o o o  o  o
2336    row 5  |. . . d d d o o o o  o  o
2337           --------------------------
2338 .ve
2339 
2340    Thus, any entries in the d locations are stored in the d (diagonal)
2341    submatrix, and any entries in the o locations are stored in the
2342    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2343    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2344 
2345    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2346    plus the diagonal part of the d matrix,
2347    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2348    In general, for PDE problems in which most nonzeros are near the diagonal,
2349    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2350    or you will get TERRIBLE performance; see the users' manual chapter on
2351    matrices.
2352 
2353    Level: intermediate
2354 
2355 .keywords: matrix, block, aij, compressed row, sparse, parallel
2356 
2357 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2358 @*/
2359 
2360 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2361 {
2362   PetscErrorCode ierr;
2363   PetscMPIInt    size;
2364 
2365   PetscFunctionBegin;
2366   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2367   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2368   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2369   if (size > 1) {
2370     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2371     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2372   } else {
2373     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2374     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2375   }
2376   PetscFunctionReturn(0);
2377 }
2378 
2379 
2380 #undef __FUNCT__
2381 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2382 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2383 {
2384   Mat            mat;
2385   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2386   PetscErrorCode ierr;
2387   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2388   PetscScalar    *array;
2389 
2390   PetscFunctionBegin;
2391   *newmat = 0;
2392 
2393   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2394   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2395   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2396   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2397   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2398   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2399 
2400   mat->factortype   = matin->factortype;
2401   mat->preallocated = PETSC_TRUE;
2402   mat->assembled    = PETSC_TRUE;
2403   mat->insertmode   = NOT_SET_VALUES;
2404 
2405   a      = (Mat_MPISBAIJ*)mat->data;
2406   a->bs2 = oldmat->bs2;
2407   a->mbs = oldmat->mbs;
2408   a->nbs = oldmat->nbs;
2409   a->Mbs = oldmat->Mbs;
2410   a->Nbs = oldmat->Nbs;
2411 
2412 
2413   a->size         = oldmat->size;
2414   a->rank         = oldmat->rank;
2415   a->donotstash   = oldmat->donotstash;
2416   a->roworiented  = oldmat->roworiented;
2417   a->rowindices   = 0;
2418   a->rowvalues    = 0;
2419   a->getrowactive = PETSC_FALSE;
2420   a->barray       = 0;
2421   a->rstartbs     = oldmat->rstartbs;
2422   a->rendbs       = oldmat->rendbs;
2423   a->cstartbs     = oldmat->cstartbs;
2424   a->cendbs       = oldmat->cendbs;
2425 
2426   /* hash table stuff */
2427   a->ht           = 0;
2428   a->hd           = 0;
2429   a->ht_size      = 0;
2430   a->ht_flag      = oldmat->ht_flag;
2431   a->ht_fact      = oldmat->ht_fact;
2432   a->ht_total_ct  = 0;
2433   a->ht_insert_ct = 0;
2434 
2435   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2436   if (oldmat->colmap) {
2437 #if defined(PETSC_USE_CTABLE)
2438     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2439 #else
2440     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2441     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2442     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2443 #endif
2444   } else a->colmap = 0;
2445 
2446   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2447     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2448     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2449     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2450   } else a->garray = 0;
2451 
2452   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2453   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2454   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2455   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2456   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2457 
2458   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2459   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2460   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2461   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2462 
2463   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2464   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2465   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2466   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2467   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2468   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2469   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2470   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2471   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2472   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2473   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2474   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2475   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2476 
2477   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2478   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2479   a->sMvctx = oldmat->sMvctx;
2480   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2481 
2482   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2483   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2484   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2485   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2486   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2487   *newmat = mat;
2488   PetscFunctionReturn(0);
2489 }
2490 
2491 #undef __FUNCT__
2492 #define __FUNCT__ "MatLoad_MPISBAIJ"
2493 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2494 {
2495   PetscErrorCode ierr;
2496   PetscInt       i,nz,j,rstart,rend;
2497   PetscScalar    *vals,*buf;
2498   MPI_Comm       comm;
2499   MPI_Status     status;
2500   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2501   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2502   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2503   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2504   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2505   PetscInt       dcount,kmax,k,nzcount,tmp;
2506   int            fd;
2507 
2508   PetscFunctionBegin;
2509   /* force binary viewer to load .info file if it has not yet done so */
2510   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2511   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2512   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2513   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2514   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2515   if (bs < 0) bs = 1;
2516 
2517   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2518   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2519   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2520   if (!rank) {
2521     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2522     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2523     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2524   }
2525 
2526   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2527   M    = header[1];
2528   N    = header[2];
2529 
2530   /* If global sizes are set, check if they are consistent with that given in the file */
2531   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
2532   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
2533 
2534   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2535 
2536   /*
2537      This code adds extra rows to make sure the number of rows is
2538      divisible by the blocksize
2539   */
2540   Mbs        = M/bs;
2541   extra_rows = bs - M + bs*(Mbs);
2542   if (extra_rows == bs) extra_rows = 0;
2543   else                  Mbs++;
2544   if (extra_rows &&!rank) {
2545     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2546   }
2547 
2548   /* determine ownership of all rows */
2549   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2550     mbs = Mbs/size + ((Mbs % size) > rank);
2551     m   = mbs*bs;
2552   } else { /* User Set */
2553     m   = newmat->rmap->n;
2554     mbs = m/bs;
2555   }
2556   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2557   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2558   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2559   rowners[0] = 0;
2560   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2561   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2562   rstart = rowners[rank];
2563   rend   = rowners[rank+1];
2564 
2565   /* distribute row lengths to all processors */
2566   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2567   if (!rank) {
2568     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2569     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2570     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2571     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2572     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2573     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2574     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2575   } else {
2576     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2577   }
2578 
2579   if (!rank) {   /* procs[0] */
2580     /* calculate the number of nonzeros on each processor */
2581     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2582     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2583     for (i=0; i<size; i++) {
2584       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2585         procsnz[i] += rowlengths[j];
2586       }
2587     }
2588     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2589 
2590     /* determine max buffer needed and allocate it */
2591     maxnz = 0;
2592     for (i=0; i<size; i++) {
2593       maxnz = PetscMax(maxnz,procsnz[i]);
2594     }
2595     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2596 
2597     /* read in my part of the matrix column indices  */
2598     nz     = procsnz[0];
2599     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2600     mycols = ibuf;
2601     if (size == 1) nz -= extra_rows;
2602     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2603     if (size == 1) {
2604       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2605     }
2606 
2607     /* read in every ones (except the last) and ship off */
2608     for (i=1; i<size-1; i++) {
2609       nz   = procsnz[i];
2610       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2611       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2612     }
2613     /* read in the stuff for the last proc */
2614     if (size != 1) {
2615       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2616       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2617       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2618       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2619     }
2620     ierr = PetscFree(cols);CHKERRQ(ierr);
2621   } else {  /* procs[i], i>0 */
2622     /* determine buffer space needed for message */
2623     nz = 0;
2624     for (i=0; i<m; i++) nz += locrowlens[i];
2625     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2626     mycols = ibuf;
2627     /* receive message of column indices*/
2628     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2629     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2630     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2631   }
2632 
2633   /* loop over local rows, determining number of off diagonal entries */
2634   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2635   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2636   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2637   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2638   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2639   rowcount = 0;
2640   nzcount  = 0;
2641   for (i=0; i<mbs; i++) {
2642     dcount  = 0;
2643     odcount = 0;
2644     for (j=0; j<bs; j++) {
2645       kmax = locrowlens[rowcount];
2646       for (k=0; k<kmax; k++) {
2647         tmp = mycols[nzcount++]/bs; /* block col. index */
2648         if (!mask[tmp]) {
2649           mask[tmp] = 1;
2650           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2651           else masked1[dcount++] = tmp; /* entry in diag portion */
2652         }
2653       }
2654       rowcount++;
2655     }
2656 
2657     dlens[i]  = dcount;  /* d_nzz[i] */
2658     odlens[i] = odcount; /* o_nzz[i] */
2659 
2660     /* zero out the mask elements we set */
2661     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2662     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2663   }
2664   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2665   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2666   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2667 
2668   if (!rank) {
2669     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2670     /* read in my part of the matrix numerical values  */
2671     nz     = procsnz[0];
2672     vals   = buf;
2673     mycols = ibuf;
2674     if (size == 1) nz -= extra_rows;
2675     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2676     if (size == 1) {
2677       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2678     }
2679 
2680     /* insert into matrix */
2681     jj = rstart*bs;
2682     for (i=0; i<m; i++) {
2683       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2684       mycols += locrowlens[i];
2685       vals   += locrowlens[i];
2686       jj++;
2687     }
2688 
2689     /* read in other processors (except the last one) and ship out */
2690     for (i=1; i<size-1; i++) {
2691       nz   = procsnz[i];
2692       vals = buf;
2693       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2694       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2695     }
2696     /* the last proc */
2697     if (size != 1) {
2698       nz   = procsnz[i] - extra_rows;
2699       vals = buf;
2700       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2701       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2702       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2703     }
2704     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2705 
2706   } else {
2707     /* receive numeric values */
2708     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2709 
2710     /* receive message of values*/
2711     vals   = buf;
2712     mycols = ibuf;
2713     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2714     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2715     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2716 
2717     /* insert into matrix */
2718     jj = rstart*bs;
2719     for (i=0; i<m; i++) {
2720       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2721       mycols += locrowlens[i];
2722       vals   += locrowlens[i];
2723       jj++;
2724     }
2725   }
2726 
2727   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2728   ierr = PetscFree(buf);CHKERRQ(ierr);
2729   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2730   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2731   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2732   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2733   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2734   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2735   PetscFunctionReturn(0);
2736 }
2737 
2738 #undef __FUNCT__
2739 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2740 /*XXXXX@
2741    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2742 
2743    Input Parameters:
2744 .  mat  - the matrix
2745 .  fact - factor
2746 
2747    Not Collective on Mat, each process can have a different hash factor
2748 
2749    Level: advanced
2750 
2751   Notes:
2752    This can also be set by the command line option: -mat_use_hash_table fact
2753 
2754 .keywords: matrix, hashtable, factor, HT
2755 
2756 .seealso: MatSetOption()
2757 @XXXXX*/
2758 
2759 
2760 #undef __FUNCT__
2761 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2762 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2763 {
2764   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2765   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2766   PetscReal      atmp;
2767   PetscReal      *work,*svalues,*rvalues;
2768   PetscErrorCode ierr;
2769   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2770   PetscMPIInt    rank,size;
2771   PetscInt       *rowners_bs,dest,count,source;
2772   PetscScalar    *va;
2773   MatScalar      *ba;
2774   MPI_Status     stat;
2775 
2776   PetscFunctionBegin;
2777   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2778   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2779   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2780 
2781   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2782   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2783 
2784   bs  = A->rmap->bs;
2785   mbs = a->mbs;
2786   Mbs = a->Mbs;
2787   ba  = b->a;
2788   bi  = b->i;
2789   bj  = b->j;
2790 
2791   /* find ownerships */
2792   rowners_bs = A->rmap->range;
2793 
2794   /* each proc creates an array to be distributed */
2795   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2796   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2797 
2798   /* row_max for B */
2799   if (rank != size-1) {
2800     for (i=0; i<mbs; i++) {
2801       ncols = bi[1] - bi[0]; bi++;
2802       brow  = bs*i;
2803       for (j=0; j<ncols; j++) {
2804         bcol = bs*(*bj);
2805         for (kcol=0; kcol<bs; kcol++) {
2806           col  = bcol + kcol;                /* local col index */
2807           col += rowners_bs[rank+1];      /* global col index */
2808           for (krow=0; krow<bs; krow++) {
2809             atmp = PetscAbsScalar(*ba); ba++;
2810             row  = brow + krow;   /* local row index */
2811             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2812             if (work[col] < atmp) work[col] = atmp;
2813           }
2814         }
2815         bj++;
2816       }
2817     }
2818 
2819     /* send values to its owners */
2820     for (dest=rank+1; dest<size; dest++) {
2821       svalues = work + rowners_bs[dest];
2822       count   = rowners_bs[dest+1]-rowners_bs[dest];
2823       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2824     }
2825   }
2826 
2827   /* receive values */
2828   if (rank) {
2829     rvalues = work;
2830     count   = rowners_bs[rank+1]-rowners_bs[rank];
2831     for (source=0; source<rank; source++) {
2832       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2833       /* process values */
2834       for (i=0; i<count; i++) {
2835         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2836       }
2837     }
2838   }
2839 
2840   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2841   ierr = PetscFree(work);CHKERRQ(ierr);
2842   PetscFunctionReturn(0);
2843 }
2844 
2845 #undef __FUNCT__
2846 #define __FUNCT__ "MatSOR_MPISBAIJ"
2847 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2848 {
2849   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2850   PetscErrorCode    ierr;
2851   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2852   PetscScalar       *x,*ptr,*from;
2853   Vec               bb1;
2854   const PetscScalar *b;
2855 
2856   PetscFunctionBegin;
2857   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2858   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2859 
2860   if (flag == SOR_APPLY_UPPER) {
2861     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2862     PetscFunctionReturn(0);
2863   }
2864 
2865   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2866     if (flag & SOR_ZERO_INITIAL_GUESS) {
2867       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2868       its--;
2869     }
2870 
2871     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2872     while (its--) {
2873 
2874       /* lower triangular part: slvec0b = - B^T*xx */
2875       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2876 
2877       /* copy xx into slvec0a */
2878       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2879       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2880       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2881       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2882 
2883       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2884 
2885       /* copy bb into slvec1a */
2886       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2887       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2888       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2889       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2890 
2891       /* set slvec1b = 0 */
2892       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2893 
2894       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2895       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2896       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2897       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2898 
2899       /* upper triangular part: bb1 = bb1 - B*x */
2900       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2901 
2902       /* local diagonal sweep */
2903       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2904     }
2905     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2906   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2907     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2908   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2909     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2910   } else if (flag & SOR_EISENSTAT) {
2911     Vec               xx1;
2912     PetscBool         hasop;
2913     const PetscScalar *diag;
2914     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2915     PetscInt          i,n;
2916 
2917     if (!mat->xx1) {
2918       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2919       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2920     }
2921     xx1 = mat->xx1;
2922     bb1 = mat->bb1;
2923 
2924     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2925 
2926     if (!mat->diag) {
2927       /* this is wrong for same matrix with new nonzero values */
2928       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2929       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2930     }
2931     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2932 
2933     if (hasop) {
2934       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2935       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2936     } else {
2937       /*
2938           These two lines are replaced by code that may be a bit faster for a good compiler
2939       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2940       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2941       */
2942       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2943       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2944       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2945       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2946       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2947       if (omega == 1.0) {
2948         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2949         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2950       } else {
2951         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2952         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2953       }
2954       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2955       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2956       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2957       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2958     }
2959 
2960     /* multiply off-diagonal portion of matrix */
2961     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2962     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2963     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2964     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2965     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2966     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2967     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2968     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2969     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2970     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2971 
2972     /* local sweep */
2973     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2974     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2975   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2976   PetscFunctionReturn(0);
2977 }
2978 
2979 #undef __FUNCT__
2980 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2981 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2982 {
2983   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2984   PetscErrorCode ierr;
2985   Vec            lvec1,bb1;
2986 
2987   PetscFunctionBegin;
2988   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2989   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2990 
2991   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2992     if (flag & SOR_ZERO_INITIAL_GUESS) {
2993       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2994       its--;
2995     }
2996 
2997     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2998     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2999     while (its--) {
3000       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3001 
3002       /* lower diagonal part: bb1 = bb - B^T*xx */
3003       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
3004       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
3005 
3006       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3007       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
3008       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3009 
3010       /* upper diagonal part: bb1 = bb1 - B*x */
3011       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
3012       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
3013 
3014       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3015 
3016       /* diagonal sweep */
3017       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3018     }
3019     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
3020     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3021   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3022   PetscFunctionReturn(0);
3023 }
3024 
3025 #undef __FUNCT__
3026 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
3027 /*@
3028      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
3029          CSR format the local rows.
3030 
3031    Collective on MPI_Comm
3032 
3033    Input Parameters:
3034 +  comm - MPI communicator
3035 .  bs - the block size, only a block size of 1 is supported
3036 .  m - number of local rows (Cannot be PETSC_DECIDE)
3037 .  n - This value should be the same as the local size used in creating the
3038        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3039        calculated if N is given) For square matrices n is almost always m.
3040 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3041 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3042 .   i - row indices
3043 .   j - column indices
3044 -   a - matrix values
3045 
3046    Output Parameter:
3047 .   mat - the matrix
3048 
3049    Level: intermediate
3050 
3051    Notes:
3052        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3053      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3054      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3055 
3056        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3057 
3058 .keywords: matrix, aij, compressed row, sparse, parallel
3059 
3060 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3061           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3062 @*/
3063 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3064 {
3065   PetscErrorCode ierr;
3066 
3067 
3068   PetscFunctionBegin;
3069   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3070   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3071   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3072   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3073   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3074   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3075   PetscFunctionReturn(0);
3076 }
3077 
3078 
3079 #undef __FUNCT__
3080 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
3081 /*@C
3082    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
3083    (the default parallel PETSc format).
3084 
3085    Collective on MPI_Comm
3086 
3087    Input Parameters:
3088 +  B - the matrix
3089 .  bs - the block size
3090 .  i - the indices into j for the start of each local row (starts with zero)
3091 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3092 -  v - optional values in the matrix
3093 
3094    Level: developer
3095 
3096 .keywords: matrix, aij, compressed row, sparse, parallel
3097 
3098 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
3099 @*/
3100 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3101 {
3102   PetscErrorCode ierr;
3103 
3104   PetscFunctionBegin;
3105   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3106   PetscFunctionReturn(0);
3107 }
3108 
3109 #undef __FUNCT__
3110 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ"
3111 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3112 {
3113   PetscErrorCode ierr;
3114   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3115   PetscInt       *indx;
3116   PetscScalar    *values;
3117 
3118   PetscFunctionBegin;
3119   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3120   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3121     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
3122     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
3123     PetscInt       *bindx,rmax=a->rmax,j;
3124 
3125     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3126     mbs = m/bs; Nbs = N/cbs;
3127     if (n == PETSC_DECIDE) {
3128       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
3129     }
3130     /* Check sum(n) = Nbs */
3131     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3132     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
3133 
3134     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3135     rstart -= mbs;
3136 
3137     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3138     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
3139     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3140     for (i=0; i<mbs; i++) {
3141       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3142       nnz = nnz/bs;
3143       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3144       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3145       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3146     }
3147     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3148     ierr = PetscFree(bindx);CHKERRQ(ierr);
3149 
3150     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3151     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3152     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3153     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
3154     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3155     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3156   }
3157 
3158   /* numeric phase */
3159   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3160   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3161 
3162   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3163   for (i=0; i<m; i++) {
3164     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3165     Ii   = i + rstart;
3166     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3167     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3168   }
3169   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3170   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3171   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3172   PetscFunctionReturn(0);
3173 }
3174