xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision e37c518b3c178882b922d1d3faeb3ee252cb498a)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 #undef __FUNCT__
11 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
12 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
13 {
14   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
15   PetscErrorCode ierr;
16 
17   PetscFunctionBegin;
18   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
19   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
20   PetscFunctionReturn(0);
21 }
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
25 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
26 {
27   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
28   PetscErrorCode ierr;
29 
30   PetscFunctionBegin;
31   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
32   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
33   PetscFunctionReturn(0);
34 }
35 
36 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
37   { \
38  \
39     brow = row/bs;  \
40     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
41     rmax = aimax[brow]; nrow = ailen[brow]; \
42     bcol = col/bs; \
43     ridx = row % bs; cidx = col % bs; \
44     low  = 0; high = nrow; \
45     while (high-low > 3) { \
46       t = (low+high)/2; \
47       if (rp[t] > bcol) high = t; \
48       else              low  = t; \
49     } \
50     for (_i=low; _i<high; _i++) { \
51       if (rp[_i] > bcol) break; \
52       if (rp[_i] == bcol) { \
53         bap = ap + bs2*_i + bs*cidx + ridx; \
54         if (addv == ADD_VALUES) *bap += value;  \
55         else                    *bap  = value;  \
56         goto a_noinsert; \
57       } \
58     } \
59     if (a->nonew == 1) goto a_noinsert; \
60     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
61     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
62     N = nrow++ - 1;  \
63     /* shift up all the later entries in this row */ \
64     for (ii=N; ii>=_i; ii--) { \
65       rp[ii+1] = rp[ii]; \
66       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
67     } \
68     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
69     rp[_i]                      = bcol;  \
70     ap[bs2*_i + bs*cidx + ridx] = value;  \
71     A->nonzerostate++;\
72 a_noinsert:; \
73     ailen[brow] = nrow; \
74   }
75 
76 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
77   { \
78     brow = row/bs;  \
79     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
80     rmax = bimax[brow]; nrow = bilen[brow]; \
81     bcol = col/bs; \
82     ridx = row % bs; cidx = col % bs; \
83     low  = 0; high = nrow; \
84     while (high-low > 3) { \
85       t = (low+high)/2; \
86       if (rp[t] > bcol) high = t; \
87       else              low  = t; \
88     } \
89     for (_i=low; _i<high; _i++) { \
90       if (rp[_i] > bcol) break; \
91       if (rp[_i] == bcol) { \
92         bap = ap + bs2*_i + bs*cidx + ridx; \
93         if (addv == ADD_VALUES) *bap += value;  \
94         else                    *bap  = value;  \
95         goto b_noinsert; \
96       } \
97     } \
98     if (b->nonew == 1) goto b_noinsert; \
99     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
100     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
101     N = nrow++ - 1;  \
102     /* shift up all the later entries in this row */ \
103     for (ii=N; ii>=_i; ii--) { \
104       rp[ii+1] = rp[ii]; \
105       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
106     } \
107     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
108     rp[_i]                      = bcol;  \
109     ap[bs2*_i + bs*cidx + ridx] = value;  \
110     B->nonzerostate++;\
111 b_noinsert:; \
112     bilen[brow] = nrow; \
113   }
114 
115 /* Only add/insert a(i,j) with i<=j (blocks).
116    Any a(i,j) with i>j input by user is ingored.
117 */
118 #undef __FUNCT__
119 #define __FUNCT__ "MatSetValues_MPISBAIJ"
120 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
121 {
122   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
123   MatScalar      value;
124   PetscBool      roworiented = baij->roworiented;
125   PetscErrorCode ierr;
126   PetscInt       i,j,row,col;
127   PetscInt       rstart_orig=mat->rmap->rstart;
128   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
129   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
130 
131   /* Some Variables required in the macro */
132   Mat          A     = baij->A;
133   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
134   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
135   MatScalar    *aa   =a->a;
136 
137   Mat         B     = baij->B;
138   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
139   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
140   MatScalar   *ba   =b->a;
141 
142   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
143   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
144   MatScalar *ap,*bap;
145 
146   /* for stash */
147   PetscInt  n_loc, *in_loc = NULL;
148   MatScalar *v_loc = NULL;
149 
150   PetscFunctionBegin;
151   if (!baij->donotstash) {
152     if (n > baij->n_loc) {
153       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
154       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
155       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
156       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
157 
158       baij->n_loc = n;
159     }
160     in_loc = baij->in_loc;
161     v_loc  = baij->v_loc;
162   }
163 
164   for (i=0; i<m; i++) {
165     if (im[i] < 0) continue;
166 #if defined(PETSC_USE_DEBUG)
167     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
168 #endif
169     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
170       row = im[i] - rstart_orig;              /* local row index */
171       for (j=0; j<n; j++) {
172         if (im[i]/bs > in[j]/bs) {
173           if (a->ignore_ltriangular) {
174             continue;    /* ignore lower triangular blocks */
175           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
176         }
177         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
178           col  = in[j] - cstart_orig;         /* local col index */
179           brow = row/bs; bcol = col/bs;
180           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
181           if (roworiented) value = v[i*n+j];
182           else             value = v[i+j*m];
183           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
184           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
185         } else if (in[j] < 0) continue;
186 #if defined(PETSC_USE_DEBUG)
187         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
188 #endif
189         else {  /* off-diag entry (B) */
190           if (mat->was_assembled) {
191             if (!baij->colmap) {
192               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
193             }
194 #if defined(PETSC_USE_CTABLE)
195             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
196             col  = col - 1;
197 #else
198             col = baij->colmap[in[j]/bs] - 1;
199 #endif
200             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
201               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
202               col  =  in[j];
203               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
204               B    = baij->B;
205               b    = (Mat_SeqBAIJ*)(B)->data;
206               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
207               ba   = b->a;
208             } else col += in[j]%bs;
209           } else col = in[j];
210           if (roworiented) value = v[i*n+j];
211           else             value = v[i+j*m];
212           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
213           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
214         }
215       }
216     } else {  /* off processor entry */
217       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
218       if (!baij->donotstash) {
219         mat->assembled = PETSC_FALSE;
220         n_loc          = 0;
221         for (j=0; j<n; j++) {
222           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
223           in_loc[n_loc] = in[j];
224           if (roworiented) {
225             v_loc[n_loc] = v[i*n+j];
226           } else {
227             v_loc[n_loc] = v[j*m+i];
228           }
229           n_loc++;
230         }
231         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ_Inlined"
240 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
241 {
242   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
243   PetscErrorCode    ierr;
244   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
245   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
246   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
247   PetscBool         roworiented=a->roworiented;
248   const PetscScalar *value     = v;
249   MatScalar         *ap,*aa = a->a,*bap;
250 
251   PetscFunctionBegin;
252   if (col < row) {
253     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
254     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
255   }
256   rp   = aj + ai[row];
257   ap   = aa + bs2*ai[row];
258   rmax = imax[row];
259   nrow = ailen[row];
260   value = v;
261   low   = 0;
262   high  = nrow;
263 
264   while (high-low > 7) {
265     t = (low+high)/2;
266     if (rp[t] > col) high = t;
267     else             low  = t;
268   }
269   for (i=low; i<high; i++) {
270     if (rp[i] > col) break;
271     if (rp[i] == col) {
272       bap = ap +  bs2*i;
273       if (roworiented) {
274         if (is == ADD_VALUES) {
275           for (ii=0; ii<bs; ii++) {
276             for (jj=ii; jj<bs2; jj+=bs) {
277               bap[jj] += *value++;
278             }
279           }
280         } else {
281           for (ii=0; ii<bs; ii++) {
282             for (jj=ii; jj<bs2; jj+=bs) {
283               bap[jj] = *value++;
284             }
285           }
286         }
287       } else {
288         if (is == ADD_VALUES) {
289           for (ii=0; ii<bs; ii++) {
290             for (jj=0; jj<bs; jj++) {
291               *bap++ += *value++;
292             }
293           }
294         } else {
295           for (ii=0; ii<bs; ii++) {
296             for (jj=0; jj<bs; jj++) {
297               *bap++  = *value++;
298             }
299           }
300         }
301       }
302       goto noinsert2;
303     }
304   }
305   if (nonew == 1) goto noinsert2;
306   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
307   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
308   N = nrow++ - 1; high++;
309   /* shift up all the later entries in this row */
310   for (ii=N; ii>=i; ii--) {
311     rp[ii+1] = rp[ii];
312     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
313   }
314   if (N >= i) {
315     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
316   }
317   rp[i] = col;
318   bap   = ap +  bs2*i;
319   if (roworiented) {
320     for (ii=0; ii<bs; ii++) {
321       for (jj=ii; jj<bs2; jj+=bs) {
322         bap[jj] = *value++;
323       }
324     }
325   } else {
326     for (ii=0; ii<bs; ii++) {
327       for (jj=0; jj<bs; jj++) {
328         *bap++ = *value++;
329       }
330     }
331   }
332   noinsert2:;
333   ailen[row] = nrow;
334   PetscFunctionReturn(0);
335 }
336 
337 #undef __FUNCT__
338 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined"
339 /*
340    This routine is exactly duplicated in mpibaij.c
341 */
342 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
343 {
344   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
345   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
346   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
347   PetscErrorCode    ierr;
348   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
349   PetscBool         roworiented=a->roworiented;
350   const PetscScalar *value     = v;
351   MatScalar         *ap,*aa = a->a,*bap;
352 
353   PetscFunctionBegin;
354   rp   = aj + ai[row];
355   ap   = aa + bs2*ai[row];
356   rmax = imax[row];
357   nrow = ailen[row];
358   low  = 0;
359   high = nrow;
360   value = v;
361   while (high-low > 7) {
362     t = (low+high)/2;
363     if (rp[t] > col) high = t;
364     else             low  = t;
365   }
366   for (i=low; i<high; i++) {
367     if (rp[i] > col) break;
368     if (rp[i] == col) {
369       bap = ap +  bs2*i;
370       if (roworiented) {
371         if (is == ADD_VALUES) {
372           for (ii=0; ii<bs; ii++) {
373             for (jj=ii; jj<bs2; jj+=bs) {
374               bap[jj] += *value++;
375             }
376           }
377         } else {
378           for (ii=0; ii<bs; ii++) {
379             for (jj=ii; jj<bs2; jj+=bs) {
380               bap[jj] = *value++;
381             }
382           }
383         }
384       } else {
385         if (is == ADD_VALUES) {
386           for (ii=0; ii<bs; ii++,value+=bs) {
387             for (jj=0; jj<bs; jj++) {
388               bap[jj] += value[jj];
389             }
390             bap += bs;
391           }
392         } else {
393           for (ii=0; ii<bs; ii++,value+=bs) {
394             for (jj=0; jj<bs; jj++) {
395               bap[jj]  = value[jj];
396             }
397             bap += bs;
398           }
399         }
400       }
401       goto noinsert2;
402     }
403   }
404   if (nonew == 1) goto noinsert2;
405   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
406   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
407   N = nrow++ - 1; high++;
408   /* shift up all the later entries in this row */
409   for (ii=N; ii>=i; ii--) {
410     rp[ii+1] = rp[ii];
411     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
412   }
413   if (N >= i) {
414     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
415   }
416   rp[i] = col;
417   bap   = ap +  bs2*i;
418   if (roworiented) {
419     for (ii=0; ii<bs; ii++) {
420       for (jj=ii; jj<bs2; jj+=bs) {
421         bap[jj] = *value++;
422       }
423     }
424   } else {
425     for (ii=0; ii<bs; ii++) {
426       for (jj=0; jj<bs; jj++) {
427         *bap++ = *value++;
428       }
429     }
430   }
431   noinsert2:;
432   ailen[row] = nrow;
433   PetscFunctionReturn(0);
434 }
435 
436 #undef __FUNCT__
437 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
438 /*
439     This routine could be optimized by removing the need for the block copy below and passing stride information
440   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
441 */
442 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
443 {
444   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
445   const MatScalar *value;
446   MatScalar       *barray     =baij->barray;
447   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
448   PetscErrorCode  ierr;
449   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
450   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
451   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
452 
453   PetscFunctionBegin;
454   if (!barray) {
455     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
456     baij->barray = barray;
457   }
458 
459   if (roworiented) {
460     stepval = (n-1)*bs;
461   } else {
462     stepval = (m-1)*bs;
463   }
464   for (i=0; i<m; i++) {
465     if (im[i] < 0) continue;
466 #if defined(PETSC_USE_DEBUG)
467     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
468 #endif
469     if (im[i] >= rstart && im[i] < rend) {
470       row = im[i] - rstart;
471       for (j=0; j<n; j++) {
472         if (im[i] > in[j]) {
473           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
474           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
475         }
476         /* If NumCol = 1 then a copy is not required */
477         if ((roworiented) && (n == 1)) {
478           barray = (MatScalar*) v + i*bs2;
479         } else if ((!roworiented) && (m == 1)) {
480           barray = (MatScalar*) v + j*bs2;
481         } else { /* Here a copy is required */
482           if (roworiented) {
483             value = v + i*(stepval+bs)*bs + j*bs;
484           } else {
485             value = v + j*(stepval+bs)*bs + i*bs;
486           }
487           for (ii=0; ii<bs; ii++,value+=stepval) {
488             for (jj=0; jj<bs; jj++) {
489               *barray++ = *value++;
490             }
491           }
492           barray -=bs2;
493         }
494 
495         if (in[j] >= cstart && in[j] < cend) {
496           col  = in[j] - cstart;
497           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
498         } else if (in[j] < 0) continue;
499 #if defined(PETSC_USE_DEBUG)
500         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
501 #endif
502         else {
503           if (mat->was_assembled) {
504             if (!baij->colmap) {
505               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
506             }
507 
508 #if defined(PETSC_USE_DEBUG)
509 #if defined(PETSC_USE_CTABLE)
510             { PetscInt data;
511               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
512               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
513             }
514 #else
515             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
516 #endif
517 #endif
518 #if defined(PETSC_USE_CTABLE)
519             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
520             col  = (col - 1)/bs;
521 #else
522             col = (baij->colmap[in[j]] - 1)/bs;
523 #endif
524             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
525               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
526               col  = in[j];
527             }
528           } else col = in[j];
529           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
530         }
531       }
532     } else {
533       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
534       if (!baij->donotstash) {
535         if (roworiented) {
536           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
537         } else {
538           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
539         }
540       }
541     }
542   }
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatGetValues_MPISBAIJ"
548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
549 {
550   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
551   PetscErrorCode ierr;
552   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
553   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
554 
555   PetscFunctionBegin;
556   for (i=0; i<m; i++) {
557     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
558     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
559     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
560       row = idxm[i] - bsrstart;
561       for (j=0; j<n; j++) {
562         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
563         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
564         if (idxn[j] >= bscstart && idxn[j] < bscend) {
565           col  = idxn[j] - bscstart;
566           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
567         } else {
568           if (!baij->colmap) {
569             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
570           }
571 #if defined(PETSC_USE_CTABLE)
572           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
573           data--;
574 #else
575           data = baij->colmap[idxn[j]/bs]-1;
576 #endif
577           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
578           else {
579             col  = data + idxn[j]%bs;
580             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
581           }
582         }
583       }
584     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
585   }
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatNorm_MPISBAIJ"
591 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
592 {
593   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
594   PetscErrorCode ierr;
595   PetscReal      sum[2],*lnorm2;
596 
597   PetscFunctionBegin;
598   if (baij->size == 1) {
599     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
600   } else {
601     if (type == NORM_FROBENIUS) {
602       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
603       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
604       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
605       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
606       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
607       ierr    = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
608       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
609       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
610     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
611       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
612       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
613       PetscReal    *rsum,*rsum2,vabs;
614       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
615       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
616       MatScalar    *v;
617 
618       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
619       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
620       /* Amat */
621       v = amat->a; jj = amat->j;
622       for (brow=0; brow<mbs; brow++) {
623         grow = bs*(rstart + brow);
624         nz   = amat->i[brow+1] - amat->i[brow];
625         for (bcol=0; bcol<nz; bcol++) {
626           gcol = bs*(rstart + *jj); jj++;
627           for (col=0; col<bs; col++) {
628             for (row=0; row<bs; row++) {
629               vabs            = PetscAbsScalar(*v); v++;
630               rsum[gcol+col] += vabs;
631               /* non-diagonal block */
632               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
633             }
634           }
635         }
636         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
637       }
638       /* Bmat */
639       v = bmat->a; jj = bmat->j;
640       for (brow=0; brow<mbs; brow++) {
641         grow = bs*(rstart + brow);
642         nz = bmat->i[brow+1] - bmat->i[brow];
643         for (bcol=0; bcol<nz; bcol++) {
644           gcol = bs*garray[*jj]; jj++;
645           for (col=0; col<bs; col++) {
646             for (row=0; row<bs; row++) {
647               vabs            = PetscAbsScalar(*v); v++;
648               rsum[gcol+col] += vabs;
649               rsum[grow+row] += vabs;
650             }
651           }
652         }
653         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
654       }
655       ierr  = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
656       *norm = 0.0;
657       for (col=0; col<mat->cmap->N; col++) {
658         if (rsum2[col] > *norm) *norm = rsum2[col];
659       }
660       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
661     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
662   }
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
668 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
669 {
670   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
671   PetscErrorCode ierr;
672   PetscInt       nstash,reallocs;
673 
674   PetscFunctionBegin;
675   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
676 
677   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
678   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
679   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
680   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
681   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
682   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
683   PetscFunctionReturn(0);
684 }
685 
686 #undef __FUNCT__
687 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
688 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
689 {
690   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
691   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
692   PetscErrorCode ierr;
693   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
694   PetscInt       *row,*col;
695   PetscBool      other_disassembled;
696   PetscMPIInt    n;
697   PetscBool      r1,r2,r3;
698   MatScalar      *val;
699 
700   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
701   PetscFunctionBegin;
702   if (!baij->donotstash &&  !mat->nooffprocentries) {
703     while (1) {
704       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
705       if (!flg) break;
706 
707       for (i=0; i<n;) {
708         /* Now identify the consecutive vals belonging to the same row */
709         for (j=i,rstart=row[j]; j<n; j++) {
710           if (row[j] != rstart) break;
711         }
712         if (j < n) ncols = j-i;
713         else       ncols = n-i;
714         /* Now assemble all these values with a single function call */
715         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
716         i    = j;
717       }
718     }
719     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
720     /* Now process the block-stash. Since the values are stashed column-oriented,
721        set the roworiented flag to column oriented, and after MatSetValues()
722        restore the original flags */
723     r1 = baij->roworiented;
724     r2 = a->roworiented;
725     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
726 
727     baij->roworiented = PETSC_FALSE;
728     a->roworiented    = PETSC_FALSE;
729 
730     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
731     while (1) {
732       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
733       if (!flg) break;
734 
735       for (i=0; i<n;) {
736         /* Now identify the consecutive vals belonging to the same row */
737         for (j=i,rstart=row[j]; j<n; j++) {
738           if (row[j] != rstart) break;
739         }
740         if (j < n) ncols = j-i;
741         else       ncols = n-i;
742         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
743         i    = j;
744       }
745     }
746     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
747 
748     baij->roworiented = r1;
749     a->roworiented    = r2;
750 
751     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
752   }
753 
754   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
755   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
756 
757   /* determine if any processor has disassembled, if so we must
758      also disassemble ourselfs, in order that we may reassemble. */
759   /*
760      if nonzero structure of submatrix B cannot change then we know that
761      no processor disassembled thus we can skip this stuff
762   */
763   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
764     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
765     if (mat->was_assembled && !other_disassembled) {
766       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
767     }
768   }
769 
770   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
771     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
772   }
773   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
774   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
775 
776   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
777 
778   baij->rowvalues = 0;
779 
780   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
781   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
782     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
783     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
784   }
785   PetscFunctionReturn(0);
786 }
787 
788 extern PetscErrorCode MatView_SeqSBAIJ(Mat,PetscViewer);
789 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
790 #include <petscdraw.h>
791 #undef __FUNCT__
792 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
793 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
794 {
795   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
796   PetscErrorCode    ierr;
797   PetscInt          bs   = mat->rmap->bs;
798   PetscMPIInt       rank = baij->rank;
799   PetscBool         iascii,isdraw;
800   PetscViewer       sviewer;
801   PetscViewerFormat format;
802 
803   PetscFunctionBegin;
804   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
805   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
806   if (iascii) {
807     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
808     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
809       MatInfo info;
810       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
811       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
812       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
813       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
814       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
815       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
816       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
817       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
818       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
819       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
820       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
821       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
822       PetscFunctionReturn(0);
823     } else if (format == PETSC_VIEWER_ASCII_INFO) {
824       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
825       PetscFunctionReturn(0);
826     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
827       PetscFunctionReturn(0);
828     }
829   }
830 
831   if (isdraw) {
832     PetscDraw draw;
833     PetscBool isnull;
834     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
835     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
836     if (isnull) PetscFunctionReturn(0);
837   }
838 
839   {
840     /* assemble the entire matrix onto first processor. */
841     Mat          A;
842     Mat_SeqSBAIJ *Aloc;
843     Mat_SeqBAIJ  *Bloc;
844     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
845     MatScalar    *a;
846     const char   *matname;
847 
848     /* Should this be the same type as mat? */
849     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
850     if (!rank) {
851       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
852     } else {
853       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
854     }
855     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
856     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
857     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
858     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
859 
860     /* copy over the A part */
861     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
862     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
863     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
864 
865     for (i=0; i<mbs; i++) {
866       rvals[0] = bs*(baij->rstartbs + i);
867       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
868       for (j=ai[i]; j<ai[i+1]; j++) {
869         col = (baij->cstartbs+aj[j])*bs;
870         for (k=0; k<bs; k++) {
871           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
872           col++;
873           a += bs;
874         }
875       }
876     }
877     /* copy over the B part */
878     Bloc = (Mat_SeqBAIJ*)baij->B->data;
879     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
880     for (i=0; i<mbs; i++) {
881 
882       rvals[0] = bs*(baij->rstartbs + i);
883       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
884       for (j=ai[i]; j<ai[i+1]; j++) {
885         col = baij->garray[aj[j]]*bs;
886         for (k=0; k<bs; k++) {
887           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
888           col++;
889           a += bs;
890         }
891       }
892     }
893     ierr = PetscFree(rvals);CHKERRQ(ierr);
894     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
895     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
896     /*
897        Everyone has to call to draw the matrix since the graphics waits are
898        synchronized across all processors that share the PetscDraw object
899     */
900     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
901     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
902     if (!rank) {
903       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
904       ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
905     }
906     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
907     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
908     ierr = MatDestroy(&A);CHKERRQ(ierr);
909   }
910   PetscFunctionReturn(0);
911 }
912 
913 #undef __FUNCT__
914 #define __FUNCT__ "MatView_MPISBAIJ"
915 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
916 {
917   PetscErrorCode ierr;
918   PetscBool      iascii,isdraw,issocket,isbinary;
919 
920   PetscFunctionBegin;
921   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
922   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
923   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
924   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
925   if (iascii || isdraw || issocket || isbinary) {
926     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
927   }
928   PetscFunctionReturn(0);
929 }
930 
931 #undef __FUNCT__
932 #define __FUNCT__ "MatDestroy_MPISBAIJ"
933 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
934 {
935   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
936   PetscErrorCode ierr;
937 
938   PetscFunctionBegin;
939 #if defined(PETSC_USE_LOG)
940   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
941 #endif
942   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
943   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
944   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
945   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
946 #if defined(PETSC_USE_CTABLE)
947   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
948 #else
949   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
950 #endif
951   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
952   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
953   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
954   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
955   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
956   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
957   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
958   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
959   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
960   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
961   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
962   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
963   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
964   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
965   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
966 #if defined(PETSC_USE_REAL_MAT_SINGLE)
967   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
968 #endif
969   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
970   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
971   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
972   ierr = PetscFree(mat->data);CHKERRQ(ierr);
973 
974   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
975   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
976   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
977   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
978   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
979   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
980 #if defined(PETSC_HAVE_ELEMENTAL)
981   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
982 #endif
983   PetscFunctionReturn(0);
984 }
985 
986 #undef __FUNCT__
987 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
988 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
989 {
990   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
991   PetscErrorCode    ierr;
992   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
993   PetscScalar       *from;
994   const PetscScalar *x;
995 
996   PetscFunctionBegin;
997   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
998   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
999 
1000   /* diagonal part */
1001   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1002   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1003 
1004   /* subdiagonal part */
1005   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1006 
1007   /* copy x into the vec slvec0 */
1008   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1009   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1010 
1011   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1012   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1013   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1014 
1015   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1016   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1017   /* supperdiagonal part */
1018   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1019   PetscFunctionReturn(0);
1020 }
1021 
1022 #undef __FUNCT__
1023 #define __FUNCT__ "MatMult_MPISBAIJ"
1024 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1025 {
1026   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1027   PetscErrorCode    ierr;
1028   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1029   PetscScalar       *from;
1030   const PetscScalar *x;
1031 
1032   PetscFunctionBegin;
1033   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1034   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1035 
1036   /* diagonal part */
1037   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1038   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1039 
1040   /* subdiagonal part */
1041   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1042 
1043   /* copy x into the vec slvec0 */
1044   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1045   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1046 
1047   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1048   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1049   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1050 
1051   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1052   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1053   /* supperdiagonal part */
1054   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 #undef __FUNCT__
1059 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
1060 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1061 {
1062   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1063   PetscErrorCode ierr;
1064   PetscInt       nt;
1065 
1066   PetscFunctionBegin;
1067   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1068   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1069 
1070   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1071   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1072 
1073   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1074   /* do diagonal part */
1075   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1076   /* do supperdiagonal part */
1077   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1078   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1079   /* do subdiagonal part */
1080   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1081   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1082   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 #undef __FUNCT__
1087 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1088 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1089 {
1090   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1091   PetscErrorCode    ierr;
1092   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1093   PetscScalar       *from,zero=0.0;
1094   const PetscScalar *x;
1095 
1096   PetscFunctionBegin;
1097   /*
1098   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
1099   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
1100   */
1101   /* diagonal part */
1102   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1103   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1104 
1105   /* subdiagonal part */
1106   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1107 
1108   /* copy x into the vec slvec0 */
1109   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1110   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1111   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1112   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1113 
1114   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1115   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1116   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1117 
1118   /* supperdiagonal part */
1119   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1125 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1126 {
1127   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1128   PetscErrorCode ierr;
1129 
1130   PetscFunctionBegin;
1131   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1132   /* do diagonal part */
1133   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1134   /* do supperdiagonal part */
1135   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1136   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1137 
1138   /* do subdiagonal part */
1139   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1140   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1141   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 /*
1146   This only works correctly for square matrices where the subblock A->A is the
1147    diagonal block
1148 */
1149 #undef __FUNCT__
1150 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1151 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1152 {
1153   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1154   PetscErrorCode ierr;
1155 
1156   PetscFunctionBegin;
1157   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1158   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 #undef __FUNCT__
1163 #define __FUNCT__ "MatScale_MPISBAIJ"
1164 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1165 {
1166   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1167   PetscErrorCode ierr;
1168 
1169   PetscFunctionBegin;
1170   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1171   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 #undef __FUNCT__
1176 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1177 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1178 {
1179   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1180   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1181   PetscErrorCode ierr;
1182   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1183   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1184   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1185 
1186   PetscFunctionBegin;
1187   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1188   mat->getrowactive = PETSC_TRUE;
1189 
1190   if (!mat->rowvalues && (idx || v)) {
1191     /*
1192         allocate enough space to hold information from the longest row.
1193     */
1194     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1195     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1196     PetscInt     max = 1,mbs = mat->mbs,tmp;
1197     for (i=0; i<mbs; i++) {
1198       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1199       if (max < tmp) max = tmp;
1200     }
1201     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1202   }
1203 
1204   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1205   lrow = row - brstart;  /* local row index */
1206 
1207   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1208   if (!v)   {pvA = 0; pvB = 0;}
1209   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1210   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1211   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1212   nztot = nzA + nzB;
1213 
1214   cmap = mat->garray;
1215   if (v  || idx) {
1216     if (nztot) {
1217       /* Sort by increasing column numbers, assuming A and B already sorted */
1218       PetscInt imark = -1;
1219       if (v) {
1220         *v = v_p = mat->rowvalues;
1221         for (i=0; i<nzB; i++) {
1222           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1223           else break;
1224         }
1225         imark = i;
1226         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1227         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1228       }
1229       if (idx) {
1230         *idx = idx_p = mat->rowindices;
1231         if (imark > -1) {
1232           for (i=0; i<imark; i++) {
1233             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1234           }
1235         } else {
1236           for (i=0; i<nzB; i++) {
1237             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1238             else break;
1239           }
1240           imark = i;
1241         }
1242         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1243         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1244       }
1245     } else {
1246       if (idx) *idx = 0;
1247       if (v)   *v   = 0;
1248     }
1249   }
1250   *nz  = nztot;
1251   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1252   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 #undef __FUNCT__
1257 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1258 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1259 {
1260   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1261 
1262   PetscFunctionBegin;
1263   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1264   baij->getrowactive = PETSC_FALSE;
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1270 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1271 {
1272   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1273   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1274 
1275   PetscFunctionBegin;
1276   aA->getrow_utriangular = PETSC_TRUE;
1277   PetscFunctionReturn(0);
1278 }
1279 #undef __FUNCT__
1280 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1281 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1282 {
1283   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1284   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1285 
1286   PetscFunctionBegin;
1287   aA->getrow_utriangular = PETSC_FALSE;
1288   PetscFunctionReturn(0);
1289 }
1290 
1291 #undef __FUNCT__
1292 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1293 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1294 {
1295   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1296   PetscErrorCode ierr;
1297 
1298   PetscFunctionBegin;
1299   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1300   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 #undef __FUNCT__
1305 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1306 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1307 {
1308   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1309   PetscErrorCode ierr;
1310 
1311   PetscFunctionBegin;
1312   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1313   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ()
1318    Input: isrow       - distributed(parallel),
1319           iscol_local - locally owned (seq)
1320 */
1321 #undef __FUNCT__
1322 #define __FUNCT__ "ISEqual_private"
1323 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1324 {
1325   PetscErrorCode ierr;
1326   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1327   const PetscInt *ptr1,*ptr2;
1328 
1329   PetscFunctionBegin;
1330   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1331   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1332   if (sz1 > sz2) {
1333     *flg = PETSC_FALSE;
1334     PetscFunctionReturn(0);
1335   }
1336 
1337   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1338   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1339 
1340   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1341   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1342   ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr);
1343   ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr);
1344   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1345   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1346 
1347   nmatch=0;
1348   k     = 0;
1349   for (i=0; i<sz1; i++){
1350     for (j=k; j<sz2; j++){
1351       if (a1[i] == a2[j]) {
1352         k = j; nmatch++;
1353         break;
1354       }
1355     }
1356   }
1357   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1358   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1359   ierr = PetscFree(a1);CHKERRQ(ierr);
1360   ierr = PetscFree(a2);CHKERRQ(ierr);
1361   if (nmatch < sz1) {
1362     *flg = PETSC_FALSE;
1363   } else {
1364     *flg = PETSC_TRUE;
1365   }
1366   PetscFunctionReturn(0);
1367 }
1368 
1369 #undef __FUNCT__
1370 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ"
1371 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1372 {
1373   PetscErrorCode ierr;
1374   IS             iscol_local;
1375   PetscInt       csize;
1376   PetscBool      isequal;
1377 
1378   PetscFunctionBegin;
1379   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1380   if (call == MAT_REUSE_MATRIX) {
1381     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1382     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1383   } else {
1384     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1385     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1386     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1387   }
1388 
1389   /* now call MatGetSubMatrix_MPIBAIJ() */
1390   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1391   if (call == MAT_INITIAL_MATRIX) {
1392     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1393     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1394   }
1395   PetscFunctionReturn(0);
1396 }
1397 
1398 #undef __FUNCT__
1399 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1400 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1401 {
1402   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1403   PetscErrorCode ierr;
1404 
1405   PetscFunctionBegin;
1406   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1407   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1413 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1414 {
1415   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1416   Mat            A  = a->A,B = a->B;
1417   PetscErrorCode ierr;
1418   PetscReal      isend[5],irecv[5];
1419 
1420   PetscFunctionBegin;
1421   info->block_size = (PetscReal)matin->rmap->bs;
1422 
1423   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1424 
1425   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1426   isend[3] = info->memory;  isend[4] = info->mallocs;
1427 
1428   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1429 
1430   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1431   isend[3] += info->memory;  isend[4] += info->mallocs;
1432   if (flag == MAT_LOCAL) {
1433     info->nz_used      = isend[0];
1434     info->nz_allocated = isend[1];
1435     info->nz_unneeded  = isend[2];
1436     info->memory       = isend[3];
1437     info->mallocs      = isend[4];
1438   } else if (flag == MAT_GLOBAL_MAX) {
1439     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1440 
1441     info->nz_used      = irecv[0];
1442     info->nz_allocated = irecv[1];
1443     info->nz_unneeded  = irecv[2];
1444     info->memory       = irecv[3];
1445     info->mallocs      = irecv[4];
1446   } else if (flag == MAT_GLOBAL_SUM) {
1447     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1448 
1449     info->nz_used      = irecv[0];
1450     info->nz_allocated = irecv[1];
1451     info->nz_unneeded  = irecv[2];
1452     info->memory       = irecv[3];
1453     info->mallocs      = irecv[4];
1454   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1455   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1456   info->fill_ratio_needed = 0;
1457   info->factor_mallocs    = 0;
1458   PetscFunctionReturn(0);
1459 }
1460 
1461 #undef __FUNCT__
1462 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1463 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1464 {
1465   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1466   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1467   PetscErrorCode ierr;
1468 
1469   PetscFunctionBegin;
1470   switch (op) {
1471   case MAT_NEW_NONZERO_LOCATIONS:
1472   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1473   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1474   case MAT_KEEP_NONZERO_PATTERN:
1475   case MAT_NEW_NONZERO_LOCATION_ERR:
1476     MatCheckPreallocated(A,1);
1477     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1478     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1479     break;
1480   case MAT_ROW_ORIENTED:
1481     MatCheckPreallocated(A,1);
1482     a->roworiented = flg;
1483 
1484     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1485     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1486     break;
1487   case MAT_NEW_DIAGONALS:
1488     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1489     break;
1490   case MAT_IGNORE_OFF_PROC_ENTRIES:
1491     a->donotstash = flg;
1492     break;
1493   case MAT_USE_HASH_TABLE:
1494     a->ht_flag = flg;
1495     break;
1496   case MAT_HERMITIAN:
1497     MatCheckPreallocated(A,1);
1498     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1499     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1500 
1501     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1502     break;
1503   case MAT_SPD:
1504     A->spd_set = PETSC_TRUE;
1505     A->spd     = flg;
1506     if (flg) {
1507       A->symmetric                  = PETSC_TRUE;
1508       A->structurally_symmetric     = PETSC_TRUE;
1509       A->symmetric_set              = PETSC_TRUE;
1510       A->structurally_symmetric_set = PETSC_TRUE;
1511     }
1512     break;
1513   case MAT_SYMMETRIC:
1514     MatCheckPreallocated(A,1);
1515     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1516     break;
1517   case MAT_STRUCTURALLY_SYMMETRIC:
1518     MatCheckPreallocated(A,1);
1519     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1520     break;
1521   case MAT_SYMMETRY_ETERNAL:
1522     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1523     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1524     break;
1525   case MAT_IGNORE_LOWER_TRIANGULAR:
1526     aA->ignore_ltriangular = flg;
1527     break;
1528   case MAT_ERROR_LOWER_TRIANGULAR:
1529     aA->ignore_ltriangular = flg;
1530     break;
1531   case MAT_GETROW_UPPERTRIANGULAR:
1532     aA->getrow_utriangular = flg;
1533     break;
1534   default:
1535     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1536   }
1537   PetscFunctionReturn(0);
1538 }
1539 
1540 #undef __FUNCT__
1541 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1542 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1543 {
1544   PetscErrorCode ierr;
1545 
1546   PetscFunctionBegin;
1547   if (MAT_INITIAL_MATRIX || *B != A) {
1548     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1549   }
1550   PetscFunctionReturn(0);
1551 }
1552 
1553 #undef __FUNCT__
1554 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1555 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1556 {
1557   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1558   Mat            a     = baij->A, b=baij->B;
1559   PetscErrorCode ierr;
1560   PetscInt       nv,m,n;
1561   PetscBool      flg;
1562 
1563   PetscFunctionBegin;
1564   if (ll != rr) {
1565     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1566     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1567   }
1568   if (!ll) PetscFunctionReturn(0);
1569 
1570   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1571   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1572 
1573   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1574   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1575 
1576   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1577 
1578   /* left diagonalscale the off-diagonal part */
1579   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1580 
1581   /* scale the diagonal part */
1582   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1583 
1584   /* right diagonalscale the off-diagonal part */
1585   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1586   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 #undef __FUNCT__
1591 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1592 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1593 {
1594   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1595   PetscErrorCode ierr;
1596 
1597   PetscFunctionBegin;
1598   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1603 
1604 #undef __FUNCT__
1605 #define __FUNCT__ "MatEqual_MPISBAIJ"
1606 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1607 {
1608   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1609   Mat            a,b,c,d;
1610   PetscBool      flg;
1611   PetscErrorCode ierr;
1612 
1613   PetscFunctionBegin;
1614   a = matA->A; b = matA->B;
1615   c = matB->A; d = matB->B;
1616 
1617   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1618   if (flg) {
1619     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1620   }
1621   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1622   PetscFunctionReturn(0);
1623 }
1624 
1625 #undef __FUNCT__
1626 #define __FUNCT__ "MatCopy_MPISBAIJ"
1627 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1628 {
1629   PetscErrorCode ierr;
1630   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1631   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1632 
1633   PetscFunctionBegin;
1634   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1635   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1636     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1637     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1638     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1639   } else {
1640     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1641     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1642   }
1643   PetscFunctionReturn(0);
1644 }
1645 
1646 #undef __FUNCT__
1647 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1648 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1649 {
1650   PetscErrorCode ierr;
1651 
1652   PetscFunctionBegin;
1653   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1654   PetscFunctionReturn(0);
1655 }
1656 
1657 #undef __FUNCT__
1658 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1659 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1660 {
1661   PetscErrorCode ierr;
1662   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1663   PetscBLASInt   bnz,one=1;
1664   Mat_SeqSBAIJ   *xa,*ya;
1665   Mat_SeqBAIJ    *xb,*yb;
1666 
1667   PetscFunctionBegin;
1668   if (str == SAME_NONZERO_PATTERN) {
1669     PetscScalar alpha = a;
1670     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1671     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1672     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1673     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1674     xb   = (Mat_SeqBAIJ*)xx->B->data;
1675     yb   = (Mat_SeqBAIJ*)yy->B->data;
1676     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1677     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1678     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1679   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1680     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1681     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1682     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1683   } else {
1684     Mat      B;
1685     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1686     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1687     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1688     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1689     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1690     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1691     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1692     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1693     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1694     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1695     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1696     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1697     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1698     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1699     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1700     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1701     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1702     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1703     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1704     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1705   }
1706   PetscFunctionReturn(0);
1707 }
1708 
1709 #undef __FUNCT__
1710 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1711 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1712 {
1713   PetscErrorCode ierr;
1714   PetscInt       i;
1715   PetscBool      flg;
1716 
1717   PetscFunctionBegin;
1718   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */
1719   for (i=0; i<n; i++) {
1720     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1721     if (!flg) {
1722       ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr);
1723     }
1724   }
1725   PetscFunctionReturn(0);
1726 }
1727 
1728 #undef __FUNCT__
1729 #define __FUNCT__ "MatShift_MPISBAIJ"
1730 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1731 {
1732   PetscErrorCode ierr;
1733   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1734   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1735 
1736   PetscFunctionBegin;
1737   if (!Y->preallocated) {
1738     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1739   } else if (!aij->nz) {
1740     PetscInt nonew = aij->nonew;
1741     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1742     aij->nonew = nonew;
1743   }
1744   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1745   PetscFunctionReturn(0);
1746 }
1747 
1748 #undef __FUNCT__
1749 #define __FUNCT__ "MatMissingDiagonal_MPISBAIJ"
1750 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1751 {
1752   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1753   PetscErrorCode ierr;
1754 
1755   PetscFunctionBegin;
1756   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1757   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
1758   if (d) {
1759     PetscInt rstart;
1760     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1761     *d += rstart/A->rmap->bs;
1762 
1763   }
1764   PetscFunctionReturn(0);
1765 }
1766 
1767 
1768 /* -------------------------------------------------------------------*/
1769 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1770                                        MatGetRow_MPISBAIJ,
1771                                        MatRestoreRow_MPISBAIJ,
1772                                        MatMult_MPISBAIJ,
1773                                /*  4*/ MatMultAdd_MPISBAIJ,
1774                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1775                                        MatMultAdd_MPISBAIJ,
1776                                        0,
1777                                        0,
1778                                        0,
1779                                /* 10*/ 0,
1780                                        0,
1781                                        0,
1782                                        MatSOR_MPISBAIJ,
1783                                        MatTranspose_MPISBAIJ,
1784                                /* 15*/ MatGetInfo_MPISBAIJ,
1785                                        MatEqual_MPISBAIJ,
1786                                        MatGetDiagonal_MPISBAIJ,
1787                                        MatDiagonalScale_MPISBAIJ,
1788                                        MatNorm_MPISBAIJ,
1789                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1790                                        MatAssemblyEnd_MPISBAIJ,
1791                                        MatSetOption_MPISBAIJ,
1792                                        MatZeroEntries_MPISBAIJ,
1793                                /* 24*/ 0,
1794                                        0,
1795                                        0,
1796                                        0,
1797                                        0,
1798                                /* 29*/ MatSetUp_MPISBAIJ,
1799                                        0,
1800                                        0,
1801                                        0,
1802                                        0,
1803                                /* 34*/ MatDuplicate_MPISBAIJ,
1804                                        0,
1805                                        0,
1806                                        0,
1807                                        0,
1808                                /* 39*/ MatAXPY_MPISBAIJ,
1809                                        MatGetSubMatrices_MPISBAIJ,
1810                                        MatIncreaseOverlap_MPISBAIJ,
1811                                        MatGetValues_MPISBAIJ,
1812                                        MatCopy_MPISBAIJ,
1813                                /* 44*/ 0,
1814                                        MatScale_MPISBAIJ,
1815                                        MatShift_MPISBAIJ,
1816                                        0,
1817                                        0,
1818                                /* 49*/ 0,
1819                                        0,
1820                                        0,
1821                                        0,
1822                                        0,
1823                                /* 54*/ 0,
1824                                        0,
1825                                        MatSetUnfactored_MPISBAIJ,
1826                                        0,
1827                                        MatSetValuesBlocked_MPISBAIJ,
1828                                /* 59*/ MatGetSubMatrix_MPISBAIJ,
1829                                        0,
1830                                        0,
1831                                        0,
1832                                        0,
1833                                /* 64*/ 0,
1834                                        0,
1835                                        0,
1836                                        0,
1837                                        0,
1838                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1839                                        0,
1840                                        0,
1841                                        0,
1842                                        0,
1843                                /* 74*/ 0,
1844                                        0,
1845                                        0,
1846                                        0,
1847                                        0,
1848                                /* 79*/ 0,
1849                                        0,
1850                                        0,
1851                                        0,
1852                                        MatLoad_MPISBAIJ,
1853                                /* 84*/ 0,
1854                                        0,
1855                                        0,
1856                                        0,
1857                                        0,
1858                                /* 89*/ 0,
1859                                        0,
1860                                        0,
1861                                        0,
1862                                        0,
1863                                /* 94*/ 0,
1864                                        0,
1865                                        0,
1866                                        0,
1867                                        0,
1868                                /* 99*/ 0,
1869                                        0,
1870                                        0,
1871                                        0,
1872                                        0,
1873                                /*104*/ 0,
1874                                        MatRealPart_MPISBAIJ,
1875                                        MatImaginaryPart_MPISBAIJ,
1876                                        MatGetRowUpperTriangular_MPISBAIJ,
1877                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1878                                /*109*/ 0,
1879                                        0,
1880                                        0,
1881                                        0,
1882                                        MatMissingDiagonal_MPISBAIJ,
1883                                /*114*/ 0,
1884                                        0,
1885                                        0,
1886                                        0,
1887                                        0,
1888                                /*119*/ 0,
1889                                        0,
1890                                        0,
1891                                        0,
1892                                        0,
1893                                /*124*/ 0,
1894                                        0,
1895                                        0,
1896                                        0,
1897                                        0,
1898                                /*129*/ 0,
1899                                        0,
1900                                        0,
1901                                        0,
1902                                        0,
1903                                /*134*/ 0,
1904                                        0,
1905                                        0,
1906                                        0,
1907                                        0,
1908                                /*139*/ 0,
1909                                        0,
1910                                        0,
1911                                        0,
1912                                        0,
1913                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
1914 };
1915 
1916 #undef __FUNCT__
1917 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1918 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1919 {
1920   PetscFunctionBegin;
1921   *a = ((Mat_MPISBAIJ*)A->data)->A;
1922   PetscFunctionReturn(0);
1923 }
1924 
1925 #undef __FUNCT__
1926 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1927 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1928 {
1929   Mat_MPISBAIJ   *b;
1930   PetscErrorCode ierr;
1931   PetscInt       i,mbs,Mbs;
1932 
1933   PetscFunctionBegin;
1934   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1935   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1936   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1937   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1938 
1939   b   = (Mat_MPISBAIJ*)B->data;
1940   mbs = B->rmap->n/bs;
1941   Mbs = B->rmap->N/bs;
1942   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1943 
1944   B->rmap->bs = bs;
1945   b->bs2      = bs*bs;
1946   b->mbs      = mbs;
1947   b->Mbs      = Mbs;
1948   b->nbs      = B->cmap->n/bs;
1949   b->Nbs      = B->cmap->N/bs;
1950 
1951   for (i=0; i<=b->size; i++) {
1952     b->rangebs[i] = B->rmap->range[i]/bs;
1953   }
1954   b->rstartbs = B->rmap->rstart/bs;
1955   b->rendbs   = B->rmap->rend/bs;
1956 
1957   b->cstartbs = B->cmap->rstart/bs;
1958   b->cendbs   = B->cmap->rend/bs;
1959 
1960   if (!B->preallocated) {
1961     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1962     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1963     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1964     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1965     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1966     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1967     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1968     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1969     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1970   }
1971 
1972   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1973   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1974 
1975   B->preallocated = PETSC_TRUE;
1976   PetscFunctionReturn(0);
1977 }
1978 
1979 #undef __FUNCT__
1980 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1981 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1982 {
1983   PetscInt       m,rstart,cstart,cend;
1984   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1985   const PetscInt *JJ    =0;
1986   PetscScalar    *values=0;
1987   PetscErrorCode ierr;
1988 
1989   PetscFunctionBegin;
1990   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1991   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1992   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1993   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1994   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1995   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1996   m      = B->rmap->n/bs;
1997   rstart = B->rmap->rstart/bs;
1998   cstart = B->cmap->rstart/bs;
1999   cend   = B->cmap->rend/bs;
2000 
2001   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2002   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2003   for (i=0; i<m; i++) {
2004     nz = ii[i+1] - ii[i];
2005     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2006     nz_max = PetscMax(nz_max,nz);
2007     JJ     = jj + ii[i];
2008     for (j=0; j<nz; j++) {
2009       if (*JJ >= cstart) break;
2010       JJ++;
2011     }
2012     d = 0;
2013     for (; j<nz; j++) {
2014       if (*JJ++ >= cend) break;
2015       d++;
2016     }
2017     d_nnz[i] = d;
2018     o_nnz[i] = nz - d;
2019   }
2020   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2021   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2022 
2023   values = (PetscScalar*)V;
2024   if (!values) {
2025     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2026     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2027   }
2028   for (i=0; i<m; i++) {
2029     PetscInt          row    = i + rstart;
2030     PetscInt          ncols  = ii[i+1] - ii[i];
2031     const PetscInt    *icols = jj + ii[i];
2032     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2033     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2034   }
2035 
2036   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2037   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2038   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2039   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2040   PetscFunctionReturn(0);
2041 }
2042 
2043 /*MC
2044    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2045    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2046    the matrix is stored.
2047 
2048   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2049   can call MatSetOption(Mat, MAT_HERMITIAN);
2050 
2051    Options Database Keys:
2052 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2053 
2054   Level: beginner
2055 
2056 .seealso: MatCreateMPISBAIJ
2057 M*/
2058 
2059 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
2060 
2061 #undef __FUNCT__
2062 #define __FUNCT__ "MatCreate_MPISBAIJ"
2063 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2064 {
2065   Mat_MPISBAIJ   *b;
2066   PetscErrorCode ierr;
2067   PetscBool      flg = PETSC_FALSE;
2068 
2069   PetscFunctionBegin;
2070   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2071   B->data = (void*)b;
2072   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2073 
2074   B->ops->destroy = MatDestroy_MPISBAIJ;
2075   B->ops->view    = MatView_MPISBAIJ;
2076   B->assembled    = PETSC_FALSE;
2077   B->insertmode   = NOT_SET_VALUES;
2078 
2079   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2080   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2081 
2082   /* build local table of row and column ownerships */
2083   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2084 
2085   /* build cache for off array entries formed */
2086   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2087 
2088   b->donotstash  = PETSC_FALSE;
2089   b->colmap      = NULL;
2090   b->garray      = NULL;
2091   b->roworiented = PETSC_TRUE;
2092 
2093   /* stuff used in block assembly */
2094   b->barray = 0;
2095 
2096   /* stuff used for matrix vector multiply */
2097   b->lvec    = 0;
2098   b->Mvctx   = 0;
2099   b->slvec0  = 0;
2100   b->slvec0b = 0;
2101   b->slvec1  = 0;
2102   b->slvec1a = 0;
2103   b->slvec1b = 0;
2104   b->sMvctx  = 0;
2105 
2106   /* stuff for MatGetRow() */
2107   b->rowindices   = 0;
2108   b->rowvalues    = 0;
2109   b->getrowactive = PETSC_FALSE;
2110 
2111   /* hash table stuff */
2112   b->ht           = 0;
2113   b->hd           = 0;
2114   b->ht_size      = 0;
2115   b->ht_flag      = PETSC_FALSE;
2116   b->ht_fact      = 0;
2117   b->ht_total_ct  = 0;
2118   b->ht_insert_ct = 0;
2119 
2120   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
2121   b->ijonly = PETSC_FALSE;
2122 
2123   b->in_loc = 0;
2124   b->v_loc  = 0;
2125   b->n_loc  = 0;
2126 
2127   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2128   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2129   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
2130   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2131   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2132 #if defined(PETSC_HAVE_ELEMENTAL)
2133   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2134 #endif
2135 
2136   B->symmetric                  = PETSC_TRUE;
2137   B->structurally_symmetric     = PETSC_TRUE;
2138   B->symmetric_set              = PETSC_TRUE;
2139   B->structurally_symmetric_set = PETSC_TRUE;
2140 
2141   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2142   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2143   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2144   if (flg) {
2145     PetscReal fact = 1.39;
2146     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2147     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2148     if (fact <= 1.0) fact = 1.39;
2149     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2150     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2151   }
2152   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2153   PetscFunctionReturn(0);
2154 }
2155 
2156 /*MC
2157    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2158 
2159    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2160    and MATMPISBAIJ otherwise.
2161 
2162    Options Database Keys:
2163 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2164 
2165   Level: beginner
2166 
2167 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
2168 M*/
2169 
2170 #undef __FUNCT__
2171 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
2172 /*@C
2173    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2174    the user should preallocate the matrix storage by setting the parameters
2175    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2176    performance can be increased by more than a factor of 50.
2177 
2178    Collective on Mat
2179 
2180    Input Parameters:
2181 +  B - the matrix
2182 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2183           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2184 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2185            submatrix  (same for all local rows)
2186 .  d_nnz - array containing the number of block nonzeros in the various block rows
2187            in the upper triangular and diagonal part of the in diagonal portion of the local
2188            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2189            for the diagonal entry and set a value even if it is zero.
2190 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2191            submatrix (same for all local rows).
2192 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2193            off-diagonal portion of the local submatrix that is right of the diagonal
2194            (possibly different for each block row) or NULL.
2195 
2196 
2197    Options Database Keys:
2198 .   -mat_no_unroll - uses code that does not unroll the loops in the
2199                      block calculations (much slower)
2200 .   -mat_block_size - size of the blocks to use
2201 
2202    Notes:
2203 
2204    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2205    than it must be used on all processors that share the object for that argument.
2206 
2207    If the *_nnz parameter is given then the *_nz parameter is ignored
2208 
2209    Storage Information:
2210    For a square global matrix we define each processor's diagonal portion
2211    to be its local rows and the corresponding columns (a square submatrix);
2212    each processor's off-diagonal portion encompasses the remainder of the
2213    local matrix (a rectangular submatrix).
2214 
2215    The user can specify preallocated storage for the diagonal part of
2216    the local submatrix with either d_nz or d_nnz (not both).  Set
2217    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2218    memory allocation.  Likewise, specify preallocated storage for the
2219    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2220 
2221    You can call MatGetInfo() to get information on how effective the preallocation was;
2222    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2223    You can also run with the option -info and look for messages with the string
2224    malloc in them to see if additional memory allocation was needed.
2225 
2226    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2227    the figure below we depict these three local rows and all columns (0-11).
2228 
2229 .vb
2230            0 1 2 3 4 5 6 7 8 9 10 11
2231           --------------------------
2232    row 3  |. . . d d d o o o o  o  o
2233    row 4  |. . . d d d o o o o  o  o
2234    row 5  |. . . d d d o o o o  o  o
2235           --------------------------
2236 .ve
2237 
2238    Thus, any entries in the d locations are stored in the d (diagonal)
2239    submatrix, and any entries in the o locations are stored in the
2240    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2241    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2242 
2243    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2244    plus the diagonal part of the d matrix,
2245    and o_nz should indicate the number of block nonzeros per row in the o matrix
2246 
2247    In general, for PDE problems in which most nonzeros are near the diagonal,
2248    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2249    or you will get TERRIBLE performance; see the users' manual chapter on
2250    matrices.
2251 
2252    Level: intermediate
2253 
2254 .keywords: matrix, block, aij, compressed row, sparse, parallel
2255 
2256 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2257 @*/
2258 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2259 {
2260   PetscErrorCode ierr;
2261 
2262   PetscFunctionBegin;
2263   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2264   PetscValidType(B,1);
2265   PetscValidLogicalCollectiveInt(B,bs,2);
2266   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2267   PetscFunctionReturn(0);
2268 }
2269 
2270 #undef __FUNCT__
2271 #define __FUNCT__ "MatCreateSBAIJ"
2272 /*@C
2273    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2274    (block compressed row).  For good matrix assembly performance
2275    the user should preallocate the matrix storage by setting the parameters
2276    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2277    performance can be increased by more than a factor of 50.
2278 
2279    Collective on MPI_Comm
2280 
2281    Input Parameters:
2282 +  comm - MPI communicator
2283 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2284           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2285 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2286            This value should be the same as the local size used in creating the
2287            y vector for the matrix-vector product y = Ax.
2288 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2289            This value should be the same as the local size used in creating the
2290            x vector for the matrix-vector product y = Ax.
2291 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2292 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2293 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2294            submatrix  (same for all local rows)
2295 .  d_nnz - array containing the number of block nonzeros in the various block rows
2296            in the upper triangular portion of the in diagonal portion of the local
2297            (possibly different for each block block row) or NULL.
2298            If you plan to factor the matrix you must leave room for the diagonal entry and
2299            set its value even if it is zero.
2300 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2301            submatrix (same for all local rows).
2302 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2303            off-diagonal portion of the local submatrix (possibly different for
2304            each block row) or NULL.
2305 
2306    Output Parameter:
2307 .  A - the matrix
2308 
2309    Options Database Keys:
2310 .   -mat_no_unroll - uses code that does not unroll the loops in the
2311                      block calculations (much slower)
2312 .   -mat_block_size - size of the blocks to use
2313 .   -mat_mpi - use the parallel matrix data structures even on one processor
2314                (defaults to using SeqBAIJ format on one processor)
2315 
2316    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2317    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2318    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2319 
2320    Notes:
2321    The number of rows and columns must be divisible by blocksize.
2322    This matrix type does not support complex Hermitian operation.
2323 
2324    The user MUST specify either the local or global matrix dimensions
2325    (possibly both).
2326 
2327    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2328    than it must be used on all processors that share the object for that argument.
2329 
2330    If the *_nnz parameter is given then the *_nz parameter is ignored
2331 
2332    Storage Information:
2333    For a square global matrix we define each processor's diagonal portion
2334    to be its local rows and the corresponding columns (a square submatrix);
2335    each processor's off-diagonal portion encompasses the remainder of the
2336    local matrix (a rectangular submatrix).
2337 
2338    The user can specify preallocated storage for the diagonal part of
2339    the local submatrix with either d_nz or d_nnz (not both).  Set
2340    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2341    memory allocation.  Likewise, specify preallocated storage for the
2342    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2343 
2344    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2345    the figure below we depict these three local rows and all columns (0-11).
2346 
2347 .vb
2348            0 1 2 3 4 5 6 7 8 9 10 11
2349           --------------------------
2350    row 3  |. . . d d d o o o o  o  o
2351    row 4  |. . . d d d o o o o  o  o
2352    row 5  |. . . d d d o o o o  o  o
2353           --------------------------
2354 .ve
2355 
2356    Thus, any entries in the d locations are stored in the d (diagonal)
2357    submatrix, and any entries in the o locations are stored in the
2358    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2359    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2360 
2361    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2362    plus the diagonal part of the d matrix,
2363    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2364    In general, for PDE problems in which most nonzeros are near the diagonal,
2365    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2366    or you will get TERRIBLE performance; see the users' manual chapter on
2367    matrices.
2368 
2369    Level: intermediate
2370 
2371 .keywords: matrix, block, aij, compressed row, sparse, parallel
2372 
2373 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2374 @*/
2375 
2376 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2377 {
2378   PetscErrorCode ierr;
2379   PetscMPIInt    size;
2380 
2381   PetscFunctionBegin;
2382   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2383   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2384   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2385   if (size > 1) {
2386     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2387     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2388   } else {
2389     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2390     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2391   }
2392   PetscFunctionReturn(0);
2393 }
2394 
2395 
2396 #undef __FUNCT__
2397 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2398 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2399 {
2400   Mat            mat;
2401   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2402   PetscErrorCode ierr;
2403   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2404   PetscScalar    *array;
2405 
2406   PetscFunctionBegin;
2407   *newmat = 0;
2408 
2409   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2410   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2411   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2412   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2413   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2414   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2415 
2416   mat->factortype   = matin->factortype;
2417   mat->preallocated = PETSC_TRUE;
2418   mat->assembled    = PETSC_TRUE;
2419   mat->insertmode   = NOT_SET_VALUES;
2420 
2421   a      = (Mat_MPISBAIJ*)mat->data;
2422   a->bs2 = oldmat->bs2;
2423   a->mbs = oldmat->mbs;
2424   a->nbs = oldmat->nbs;
2425   a->Mbs = oldmat->Mbs;
2426   a->Nbs = oldmat->Nbs;
2427 
2428 
2429   a->size         = oldmat->size;
2430   a->rank         = oldmat->rank;
2431   a->donotstash   = oldmat->donotstash;
2432   a->roworiented  = oldmat->roworiented;
2433   a->rowindices   = 0;
2434   a->rowvalues    = 0;
2435   a->getrowactive = PETSC_FALSE;
2436   a->barray       = 0;
2437   a->rstartbs     = oldmat->rstartbs;
2438   a->rendbs       = oldmat->rendbs;
2439   a->cstartbs     = oldmat->cstartbs;
2440   a->cendbs       = oldmat->cendbs;
2441 
2442   /* hash table stuff */
2443   a->ht           = 0;
2444   a->hd           = 0;
2445   a->ht_size      = 0;
2446   a->ht_flag      = oldmat->ht_flag;
2447   a->ht_fact      = oldmat->ht_fact;
2448   a->ht_total_ct  = 0;
2449   a->ht_insert_ct = 0;
2450 
2451   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2452   if (oldmat->colmap) {
2453 #if defined(PETSC_USE_CTABLE)
2454     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2455 #else
2456     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2457     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2458     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2459 #endif
2460   } else a->colmap = 0;
2461 
2462   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2463     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2464     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2465     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2466   } else a->garray = 0;
2467 
2468   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2469   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2470   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2471   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2472   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2473 
2474   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2475   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2476   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2477   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2478 
2479   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2480   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2481   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2482   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2483   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2484   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2485   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2486   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2487   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2488   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2489   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2490   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2491   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2492 
2493   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2494   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2495   a->sMvctx = oldmat->sMvctx;
2496   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2497 
2498   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2499   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2500   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2501   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2502   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2503   *newmat = mat;
2504   PetscFunctionReturn(0);
2505 }
2506 
2507 #undef __FUNCT__
2508 #define __FUNCT__ "MatLoad_MPISBAIJ"
2509 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2510 {
2511   PetscErrorCode ierr;
2512   PetscInt       i,nz,j,rstart,rend;
2513   PetscScalar    *vals,*buf;
2514   MPI_Comm       comm;
2515   MPI_Status     status;
2516   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2517   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2518   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2519   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2520   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2521   PetscInt       dcount,kmax,k,nzcount,tmp;
2522   int            fd;
2523 
2524   PetscFunctionBegin;
2525   /* force binary viewer to load .info file if it has not yet done so */
2526   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2527   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2528   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2529   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2530   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2531   if (bs < 0) bs = 1;
2532 
2533   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2534   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2535   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2536   if (!rank) {
2537     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2538     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2539     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2540   }
2541 
2542   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2543   M    = header[1];
2544   N    = header[2];
2545 
2546   /* If global sizes are set, check if they are consistent with that given in the file */
2547   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
2548   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
2549 
2550   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2551 
2552   /*
2553      This code adds extra rows to make sure the number of rows is
2554      divisible by the blocksize
2555   */
2556   Mbs        = M/bs;
2557   extra_rows = bs - M + bs*(Mbs);
2558   if (extra_rows == bs) extra_rows = 0;
2559   else                  Mbs++;
2560   if (extra_rows &&!rank) {
2561     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2562   }
2563 
2564   /* determine ownership of all rows */
2565   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2566     mbs = Mbs/size + ((Mbs % size) > rank);
2567     m   = mbs*bs;
2568   } else { /* User Set */
2569     m   = newmat->rmap->n;
2570     mbs = m/bs;
2571   }
2572   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2573   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2574   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2575   rowners[0] = 0;
2576   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2577   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2578   rstart = rowners[rank];
2579   rend   = rowners[rank+1];
2580 
2581   /* distribute row lengths to all processors */
2582   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2583   if (!rank) {
2584     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2585     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2586     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2587     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2588     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2589     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2590     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2591   } else {
2592     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2593   }
2594 
2595   if (!rank) {   /* procs[0] */
2596     /* calculate the number of nonzeros on each processor */
2597     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2598     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2599     for (i=0; i<size; i++) {
2600       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2601         procsnz[i] += rowlengths[j];
2602       }
2603     }
2604     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2605 
2606     /* determine max buffer needed and allocate it */
2607     maxnz = 0;
2608     for (i=0; i<size; i++) {
2609       maxnz = PetscMax(maxnz,procsnz[i]);
2610     }
2611     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2612 
2613     /* read in my part of the matrix column indices  */
2614     nz     = procsnz[0];
2615     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2616     mycols = ibuf;
2617     if (size == 1) nz -= extra_rows;
2618     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2619     if (size == 1) {
2620       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2621     }
2622 
2623     /* read in every ones (except the last) and ship off */
2624     for (i=1; i<size-1; i++) {
2625       nz   = procsnz[i];
2626       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2627       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2628     }
2629     /* read in the stuff for the last proc */
2630     if (size != 1) {
2631       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2632       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2633       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2634       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2635     }
2636     ierr = PetscFree(cols);CHKERRQ(ierr);
2637   } else {  /* procs[i], i>0 */
2638     /* determine buffer space needed for message */
2639     nz = 0;
2640     for (i=0; i<m; i++) nz += locrowlens[i];
2641     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2642     mycols = ibuf;
2643     /* receive message of column indices*/
2644     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2645     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2646     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2647   }
2648 
2649   /* loop over local rows, determining number of off diagonal entries */
2650   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2651   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2652   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2653   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2654   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2655   rowcount = 0;
2656   nzcount  = 0;
2657   for (i=0; i<mbs; i++) {
2658     dcount  = 0;
2659     odcount = 0;
2660     for (j=0; j<bs; j++) {
2661       kmax = locrowlens[rowcount];
2662       for (k=0; k<kmax; k++) {
2663         tmp = mycols[nzcount++]/bs; /* block col. index */
2664         if (!mask[tmp]) {
2665           mask[tmp] = 1;
2666           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2667           else masked1[dcount++] = tmp; /* entry in diag portion */
2668         }
2669       }
2670       rowcount++;
2671     }
2672 
2673     dlens[i]  = dcount;  /* d_nzz[i] */
2674     odlens[i] = odcount; /* o_nzz[i] */
2675 
2676     /* zero out the mask elements we set */
2677     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2678     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2679   }
2680   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2681   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2682   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2683 
2684   if (!rank) {
2685     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2686     /* read in my part of the matrix numerical values  */
2687     nz     = procsnz[0];
2688     vals   = buf;
2689     mycols = ibuf;
2690     if (size == 1) nz -= extra_rows;
2691     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2692     if (size == 1) {
2693       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2694     }
2695 
2696     /* insert into matrix */
2697     jj = rstart*bs;
2698     for (i=0; i<m; i++) {
2699       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2700       mycols += locrowlens[i];
2701       vals   += locrowlens[i];
2702       jj++;
2703     }
2704 
2705     /* read in other processors (except the last one) and ship out */
2706     for (i=1; i<size-1; i++) {
2707       nz   = procsnz[i];
2708       vals = buf;
2709       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2710       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2711     }
2712     /* the last proc */
2713     if (size != 1) {
2714       nz   = procsnz[i] - extra_rows;
2715       vals = buf;
2716       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2717       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2718       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2719     }
2720     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2721 
2722   } else {
2723     /* receive numeric values */
2724     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2725 
2726     /* receive message of values*/
2727     vals   = buf;
2728     mycols = ibuf;
2729     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2730     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2731     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2732 
2733     /* insert into matrix */
2734     jj = rstart*bs;
2735     for (i=0; i<m; i++) {
2736       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2737       mycols += locrowlens[i];
2738       vals   += locrowlens[i];
2739       jj++;
2740     }
2741   }
2742 
2743   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2744   ierr = PetscFree(buf);CHKERRQ(ierr);
2745   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2746   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2747   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2748   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2749   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2750   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2751   PetscFunctionReturn(0);
2752 }
2753 
2754 #undef __FUNCT__
2755 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2756 /*XXXXX@
2757    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2758 
2759    Input Parameters:
2760 .  mat  - the matrix
2761 .  fact - factor
2762 
2763    Not Collective on Mat, each process can have a different hash factor
2764 
2765    Level: advanced
2766 
2767   Notes:
2768    This can also be set by the command line option: -mat_use_hash_table fact
2769 
2770 .keywords: matrix, hashtable, factor, HT
2771 
2772 .seealso: MatSetOption()
2773 @XXXXX*/
2774 
2775 
2776 #undef __FUNCT__
2777 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2778 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2779 {
2780   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2781   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2782   PetscReal      atmp;
2783   PetscReal      *work,*svalues,*rvalues;
2784   PetscErrorCode ierr;
2785   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2786   PetscMPIInt    rank,size;
2787   PetscInt       *rowners_bs,dest,count,source;
2788   PetscScalar    *va;
2789   MatScalar      *ba;
2790   MPI_Status     stat;
2791 
2792   PetscFunctionBegin;
2793   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2794   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2795   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2796 
2797   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2798   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2799 
2800   bs  = A->rmap->bs;
2801   mbs = a->mbs;
2802   Mbs = a->Mbs;
2803   ba  = b->a;
2804   bi  = b->i;
2805   bj  = b->j;
2806 
2807   /* find ownerships */
2808   rowners_bs = A->rmap->range;
2809 
2810   /* each proc creates an array to be distributed */
2811   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2812   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2813 
2814   /* row_max for B */
2815   if (rank != size-1) {
2816     for (i=0; i<mbs; i++) {
2817       ncols = bi[1] - bi[0]; bi++;
2818       brow  = bs*i;
2819       for (j=0; j<ncols; j++) {
2820         bcol = bs*(*bj);
2821         for (kcol=0; kcol<bs; kcol++) {
2822           col  = bcol + kcol;                /* local col index */
2823           col += rowners_bs[rank+1];      /* global col index */
2824           for (krow=0; krow<bs; krow++) {
2825             atmp = PetscAbsScalar(*ba); ba++;
2826             row  = brow + krow;   /* local row index */
2827             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2828             if (work[col] < atmp) work[col] = atmp;
2829           }
2830         }
2831         bj++;
2832       }
2833     }
2834 
2835     /* send values to its owners */
2836     for (dest=rank+1; dest<size; dest++) {
2837       svalues = work + rowners_bs[dest];
2838       count   = rowners_bs[dest+1]-rowners_bs[dest];
2839       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2840     }
2841   }
2842 
2843   /* receive values */
2844   if (rank) {
2845     rvalues = work;
2846     count   = rowners_bs[rank+1]-rowners_bs[rank];
2847     for (source=0; source<rank; source++) {
2848       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2849       /* process values */
2850       for (i=0; i<count; i++) {
2851         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2852       }
2853     }
2854   }
2855 
2856   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2857   ierr = PetscFree(work);CHKERRQ(ierr);
2858   PetscFunctionReturn(0);
2859 }
2860 
2861 #undef __FUNCT__
2862 #define __FUNCT__ "MatSOR_MPISBAIJ"
2863 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2864 {
2865   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2866   PetscErrorCode    ierr;
2867   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2868   PetscScalar       *x,*ptr,*from;
2869   Vec               bb1;
2870   const PetscScalar *b;
2871 
2872   PetscFunctionBegin;
2873   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2874   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2875 
2876   if (flag == SOR_APPLY_UPPER) {
2877     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2878     PetscFunctionReturn(0);
2879   }
2880 
2881   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2882     if (flag & SOR_ZERO_INITIAL_GUESS) {
2883       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2884       its--;
2885     }
2886 
2887     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2888     while (its--) {
2889 
2890       /* lower triangular part: slvec0b = - B^T*xx */
2891       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2892 
2893       /* copy xx into slvec0a */
2894       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2895       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2896       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2897       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2898 
2899       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2900 
2901       /* copy bb into slvec1a */
2902       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2903       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2904       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2905       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2906 
2907       /* set slvec1b = 0 */
2908       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2909 
2910       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2911       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2912       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2913       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2914 
2915       /* upper triangular part: bb1 = bb1 - B*x */
2916       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2917 
2918       /* local diagonal sweep */
2919       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2920     }
2921     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2922   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2923     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2924   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2925     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2926   } else if (flag & SOR_EISENSTAT) {
2927     Vec               xx1;
2928     PetscBool         hasop;
2929     const PetscScalar *diag;
2930     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2931     PetscInt          i,n;
2932 
2933     if (!mat->xx1) {
2934       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2935       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2936     }
2937     xx1 = mat->xx1;
2938     bb1 = mat->bb1;
2939 
2940     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2941 
2942     if (!mat->diag) {
2943       /* this is wrong for same matrix with new nonzero values */
2944       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2945       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2946     }
2947     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2948 
2949     if (hasop) {
2950       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2951       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2952     } else {
2953       /*
2954           These two lines are replaced by code that may be a bit faster for a good compiler
2955       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2956       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2957       */
2958       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2959       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2960       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2961       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2962       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2963       if (omega == 1.0) {
2964         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2965         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2966       } else {
2967         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2968         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2969       }
2970       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2971       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2972       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2973       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2974     }
2975 
2976     /* multiply off-diagonal portion of matrix */
2977     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2978     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2979     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2980     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2981     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2982     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2983     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2984     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2985     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2986     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2987 
2988     /* local sweep */
2989     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2990     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2991   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2992   PetscFunctionReturn(0);
2993 }
2994 
2995 #undef __FUNCT__
2996 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2997 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2998 {
2999   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
3000   PetscErrorCode ierr;
3001   Vec            lvec1,bb1;
3002 
3003   PetscFunctionBegin;
3004   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3005   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3006 
3007   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3008     if (flag & SOR_ZERO_INITIAL_GUESS) {
3009       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
3010       its--;
3011     }
3012 
3013     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
3014     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
3015     while (its--) {
3016       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3017 
3018       /* lower diagonal part: bb1 = bb - B^T*xx */
3019       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
3020       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
3021 
3022       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3023       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
3024       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3025 
3026       /* upper diagonal part: bb1 = bb1 - B*x */
3027       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
3028       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
3029 
3030       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3031 
3032       /* diagonal sweep */
3033       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3034     }
3035     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
3036     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3037   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3038   PetscFunctionReturn(0);
3039 }
3040 
3041 #undef __FUNCT__
3042 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
3043 /*@
3044      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
3045          CSR format the local rows.
3046 
3047    Collective on MPI_Comm
3048 
3049    Input Parameters:
3050 +  comm - MPI communicator
3051 .  bs - the block size, only a block size of 1 is supported
3052 .  m - number of local rows (Cannot be PETSC_DECIDE)
3053 .  n - This value should be the same as the local size used in creating the
3054        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3055        calculated if N is given) For square matrices n is almost always m.
3056 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3057 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3058 .   i - row indices
3059 .   j - column indices
3060 -   a - matrix values
3061 
3062    Output Parameter:
3063 .   mat - the matrix
3064 
3065    Level: intermediate
3066 
3067    Notes:
3068        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3069      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3070      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3071 
3072        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3073 
3074 .keywords: matrix, aij, compressed row, sparse, parallel
3075 
3076 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3077           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3078 @*/
3079 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3080 {
3081   PetscErrorCode ierr;
3082 
3083 
3084   PetscFunctionBegin;
3085   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3086   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3087   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3088   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3089   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3090   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3091   PetscFunctionReturn(0);
3092 }
3093 
3094 
3095 #undef __FUNCT__
3096 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
3097 /*@C
3098    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
3099    (the default parallel PETSc format).
3100 
3101    Collective on MPI_Comm
3102 
3103    Input Parameters:
3104 +  B - the matrix
3105 .  bs - the block size
3106 .  i - the indices into j for the start of each local row (starts with zero)
3107 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3108 -  v - optional values in the matrix
3109 
3110    Level: developer
3111 
3112 .keywords: matrix, aij, compressed row, sparse, parallel
3113 
3114 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
3115 @*/
3116 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3117 {
3118   PetscErrorCode ierr;
3119 
3120   PetscFunctionBegin;
3121   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3122   PetscFunctionReturn(0);
3123 }
3124 
3125 #undef __FUNCT__
3126 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ"
3127 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3128 {
3129   PetscErrorCode ierr;
3130   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3131   PetscInt       *indx;
3132   PetscScalar    *values;
3133 
3134   PetscFunctionBegin;
3135   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3136   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3137     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
3138     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
3139     PetscInt       *bindx,rmax=a->rmax,j;
3140 
3141     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3142     mbs = m/bs; Nbs = N/cbs;
3143     if (n == PETSC_DECIDE) {
3144       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
3145     }
3146     /* Check sum(n) = Nbs */
3147     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3148     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
3149 
3150     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3151     rstart -= mbs;
3152 
3153     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3154     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
3155     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3156     for (i=0; i<mbs; i++) {
3157       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3158       nnz = nnz/bs;
3159       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3160       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3161       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3162     }
3163     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3164     ierr = PetscFree(bindx);CHKERRQ(ierr);
3165 
3166     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3167     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3168     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3169     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
3170     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3171     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3172   }
3173 
3174   /* numeric phase */
3175   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3176   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3177 
3178   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3179   for (i=0; i<m; i++) {
3180     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3181     Ii   = i + rstart;
3182     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3183     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3184   }
3185   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3186   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3187   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3188   PetscFunctionReturn(0);
3189 }
3190