xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision bb42e86ab3e4720245217a7cdcd02146569f06da)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
11 {
12   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
13   PetscErrorCode ierr;
14 
15   PetscFunctionBegin;
16   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
17   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
18   PetscFunctionReturn(0);
19 }
20 
21 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
22 {
23   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
28   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
29   PetscFunctionReturn(0);
30 }
31 
32 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
33   { \
34  \
35     brow = row/bs;  \
36     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
37     rmax = aimax[brow]; nrow = ailen[brow]; \
38     bcol = col/bs; \
39     ridx = row % bs; cidx = col % bs; \
40     low  = 0; high = nrow; \
41     while (high-low > 3) { \
42       t = (low+high)/2; \
43       if (rp[t] > bcol) high = t; \
44       else              low  = t; \
45     } \
46     for (_i=low; _i<high; _i++) { \
47       if (rp[_i] > bcol) break; \
48       if (rp[_i] == bcol) { \
49         bap = ap + bs2*_i + bs*cidx + ridx; \
50         if (addv == ADD_VALUES) *bap += value;  \
51         else                    *bap  = value;  \
52         goto a_noinsert; \
53       } \
54     } \
55     if (a->nonew == 1) goto a_noinsert; \
56     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
57     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
58     N = nrow++ - 1;  \
59     /* shift up all the later entries in this row */ \
60     for (ii=N; ii>=_i; ii--) { \
61       rp[ii+1] = rp[ii]; \
62       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
63     } \
64     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
65     rp[_i]                      = bcol;  \
66     ap[bs2*_i + bs*cidx + ridx] = value;  \
67     A->nonzerostate++;\
68 a_noinsert:; \
69     ailen[brow] = nrow; \
70   }
71 
72 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
73   { \
74     brow = row/bs;  \
75     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
76     rmax = bimax[brow]; nrow = bilen[brow]; \
77     bcol = col/bs; \
78     ridx = row % bs; cidx = col % bs; \
79     low  = 0; high = nrow; \
80     while (high-low > 3) { \
81       t = (low+high)/2; \
82       if (rp[t] > bcol) high = t; \
83       else              low  = t; \
84     } \
85     for (_i=low; _i<high; _i++) { \
86       if (rp[_i] > bcol) break; \
87       if (rp[_i] == bcol) { \
88         bap = ap + bs2*_i + bs*cidx + ridx; \
89         if (addv == ADD_VALUES) *bap += value;  \
90         else                    *bap  = value;  \
91         goto b_noinsert; \
92       } \
93     } \
94     if (b->nonew == 1) goto b_noinsert; \
95     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
96     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
97     N = nrow++ - 1;  \
98     /* shift up all the later entries in this row */ \
99     for (ii=N; ii>=_i; ii--) { \
100       rp[ii+1] = rp[ii]; \
101       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
102     } \
103     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
104     rp[_i]                      = bcol;  \
105     ap[bs2*_i + bs*cidx + ridx] = value;  \
106     B->nonzerostate++;\
107 b_noinsert:; \
108     bilen[brow] = nrow; \
109   }
110 
111 /* Only add/insert a(i,j) with i<=j (blocks).
112    Any a(i,j) with i>j input by user is ingored.
113 */
114 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
115 {
116   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
117   MatScalar      value;
118   PetscBool      roworiented = baij->roworiented;
119   PetscErrorCode ierr;
120   PetscInt       i,j,row,col;
121   PetscInt       rstart_orig=mat->rmap->rstart;
122   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
123   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
124 
125   /* Some Variables required in the macro */
126   Mat          A     = baij->A;
127   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
128   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
129   MatScalar    *aa   =a->a;
130 
131   Mat         B     = baij->B;
132   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
133   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
134   MatScalar   *ba   =b->a;
135 
136   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
137   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
138   MatScalar *ap,*bap;
139 
140   /* for stash */
141   PetscInt  n_loc, *in_loc = NULL;
142   MatScalar *v_loc = NULL;
143 
144   PetscFunctionBegin;
145   if (!baij->donotstash) {
146     if (n > baij->n_loc) {
147       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
148       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
149       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
150       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
151 
152       baij->n_loc = n;
153     }
154     in_loc = baij->in_loc;
155     v_loc  = baij->v_loc;
156   }
157 
158   for (i=0; i<m; i++) {
159     if (im[i] < 0) continue;
160 #if defined(PETSC_USE_DEBUG)
161     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
162 #endif
163     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
164       row = im[i] - rstart_orig;              /* local row index */
165       for (j=0; j<n; j++) {
166         if (im[i]/bs > in[j]/bs) {
167           if (a->ignore_ltriangular) {
168             continue;    /* ignore lower triangular blocks */
169           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
170         }
171         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
172           col  = in[j] - cstart_orig;         /* local col index */
173           brow = row/bs; bcol = col/bs;
174           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
175           if (roworiented) value = v[i*n+j];
176           else             value = v[i+j*m];
177           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
178           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
179         } else if (in[j] < 0) continue;
180 #if defined(PETSC_USE_DEBUG)
181         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
182 #endif
183         else {  /* off-diag entry (B) */
184           if (mat->was_assembled) {
185             if (!baij->colmap) {
186               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
187             }
188 #if defined(PETSC_USE_CTABLE)
189             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
190             col  = col - 1;
191 #else
192             col = baij->colmap[in[j]/bs] - 1;
193 #endif
194             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
195               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
196               col  =  in[j];
197               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
198               B    = baij->B;
199               b    = (Mat_SeqBAIJ*)(B)->data;
200               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
201               ba   = b->a;
202             } else col += in[j]%bs;
203           } else col = in[j];
204           if (roworiented) value = v[i*n+j];
205           else             value = v[i+j*m];
206           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
207           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
208         }
209       }
210     } else {  /* off processor entry */
211       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
212       if (!baij->donotstash) {
213         mat->assembled = PETSC_FALSE;
214         n_loc          = 0;
215         for (j=0; j<n; j++) {
216           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
217           in_loc[n_loc] = in[j];
218           if (roworiented) {
219             v_loc[n_loc] = v[i*n+j];
220           } else {
221             v_loc[n_loc] = v[j*m+i];
222           }
223           n_loc++;
224         }
225         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
226       }
227     }
228   }
229   PetscFunctionReturn(0);
230 }
231 
232 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
233 {
234   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
235   PetscErrorCode    ierr;
236   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
237   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
238   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
239   PetscBool         roworiented=a->roworiented;
240   const PetscScalar *value     = v;
241   MatScalar         *ap,*aa = a->a,*bap;
242 
243   PetscFunctionBegin;
244   if (col < row) {
245     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
246     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
247   }
248   rp   = aj + ai[row];
249   ap   = aa + bs2*ai[row];
250   rmax = imax[row];
251   nrow = ailen[row];
252   value = v;
253   low   = 0;
254   high  = nrow;
255 
256   while (high-low > 7) {
257     t = (low+high)/2;
258     if (rp[t] > col) high = t;
259     else             low  = t;
260   }
261   for (i=low; i<high; i++) {
262     if (rp[i] > col) break;
263     if (rp[i] == col) {
264       bap = ap +  bs2*i;
265       if (roworiented) {
266         if (is == ADD_VALUES) {
267           for (ii=0; ii<bs; ii++) {
268             for (jj=ii; jj<bs2; jj+=bs) {
269               bap[jj] += *value++;
270             }
271           }
272         } else {
273           for (ii=0; ii<bs; ii++) {
274             for (jj=ii; jj<bs2; jj+=bs) {
275               bap[jj] = *value++;
276             }
277           }
278         }
279       } else {
280         if (is == ADD_VALUES) {
281           for (ii=0; ii<bs; ii++) {
282             for (jj=0; jj<bs; jj++) {
283               *bap++ += *value++;
284             }
285           }
286         } else {
287           for (ii=0; ii<bs; ii++) {
288             for (jj=0; jj<bs; jj++) {
289               *bap++  = *value++;
290             }
291           }
292         }
293       }
294       goto noinsert2;
295     }
296   }
297   if (nonew == 1) goto noinsert2;
298   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
299   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
300   N = nrow++ - 1; high++;
301   /* shift up all the later entries in this row */
302   for (ii=N; ii>=i; ii--) {
303     rp[ii+1] = rp[ii];
304     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
305   }
306   if (N >= i) {
307     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
308   }
309   rp[i] = col;
310   bap   = ap +  bs2*i;
311   if (roworiented) {
312     for (ii=0; ii<bs; ii++) {
313       for (jj=ii; jj<bs2; jj+=bs) {
314         bap[jj] = *value++;
315       }
316     }
317   } else {
318     for (ii=0; ii<bs; ii++) {
319       for (jj=0; jj<bs; jj++) {
320         *bap++ = *value++;
321       }
322     }
323   }
324   noinsert2:;
325   ailen[row] = nrow;
326   PetscFunctionReturn(0);
327 }
328 
329 /*
330    This routine is exactly duplicated in mpibaij.c
331 */
332 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
333 {
334   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
335   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
336   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
337   PetscErrorCode    ierr;
338   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
339   PetscBool         roworiented=a->roworiented;
340   const PetscScalar *value     = v;
341   MatScalar         *ap,*aa = a->a,*bap;
342 
343   PetscFunctionBegin;
344   rp   = aj + ai[row];
345   ap   = aa + bs2*ai[row];
346   rmax = imax[row];
347   nrow = ailen[row];
348   low  = 0;
349   high = nrow;
350   value = v;
351   while (high-low > 7) {
352     t = (low+high)/2;
353     if (rp[t] > col) high = t;
354     else             low  = t;
355   }
356   for (i=low; i<high; i++) {
357     if (rp[i] > col) break;
358     if (rp[i] == col) {
359       bap = ap +  bs2*i;
360       if (roworiented) {
361         if (is == ADD_VALUES) {
362           for (ii=0; ii<bs; ii++) {
363             for (jj=ii; jj<bs2; jj+=bs) {
364               bap[jj] += *value++;
365             }
366           }
367         } else {
368           for (ii=0; ii<bs; ii++) {
369             for (jj=ii; jj<bs2; jj+=bs) {
370               bap[jj] = *value++;
371             }
372           }
373         }
374       } else {
375         if (is == ADD_VALUES) {
376           for (ii=0; ii<bs; ii++,value+=bs) {
377             for (jj=0; jj<bs; jj++) {
378               bap[jj] += value[jj];
379             }
380             bap += bs;
381           }
382         } else {
383           for (ii=0; ii<bs; ii++,value+=bs) {
384             for (jj=0; jj<bs; jj++) {
385               bap[jj]  = value[jj];
386             }
387             bap += bs;
388           }
389         }
390       }
391       goto noinsert2;
392     }
393   }
394   if (nonew == 1) goto noinsert2;
395   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
396   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
397   N = nrow++ - 1; high++;
398   /* shift up all the later entries in this row */
399   for (ii=N; ii>=i; ii--) {
400     rp[ii+1] = rp[ii];
401     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
402   }
403   if (N >= i) {
404     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
405   }
406   rp[i] = col;
407   bap   = ap +  bs2*i;
408   if (roworiented) {
409     for (ii=0; ii<bs; ii++) {
410       for (jj=ii; jj<bs2; jj+=bs) {
411         bap[jj] = *value++;
412       }
413     }
414   } else {
415     for (ii=0; ii<bs; ii++) {
416       for (jj=0; jj<bs; jj++) {
417         *bap++ = *value++;
418       }
419     }
420   }
421   noinsert2:;
422   ailen[row] = nrow;
423   PetscFunctionReturn(0);
424 }
425 
426 /*
427     This routine could be optimized by removing the need for the block copy below and passing stride information
428   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
429 */
430 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
431 {
432   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
433   const MatScalar *value;
434   MatScalar       *barray     =baij->barray;
435   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
436   PetscErrorCode  ierr;
437   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
438   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
439   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
440 
441   PetscFunctionBegin;
442   if (!barray) {
443     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
444     baij->barray = barray;
445   }
446 
447   if (roworiented) {
448     stepval = (n-1)*bs;
449   } else {
450     stepval = (m-1)*bs;
451   }
452   for (i=0; i<m; i++) {
453     if (im[i] < 0) continue;
454 #if defined(PETSC_USE_DEBUG)
455     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
456 #endif
457     if (im[i] >= rstart && im[i] < rend) {
458       row = im[i] - rstart;
459       for (j=0; j<n; j++) {
460         if (im[i] > in[j]) {
461           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
462           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
463         }
464         /* If NumCol = 1 then a copy is not required */
465         if ((roworiented) && (n == 1)) {
466           barray = (MatScalar*) v + i*bs2;
467         } else if ((!roworiented) && (m == 1)) {
468           barray = (MatScalar*) v + j*bs2;
469         } else { /* Here a copy is required */
470           if (roworiented) {
471             value = v + i*(stepval+bs)*bs + j*bs;
472           } else {
473             value = v + j*(stepval+bs)*bs + i*bs;
474           }
475           for (ii=0; ii<bs; ii++,value+=stepval) {
476             for (jj=0; jj<bs; jj++) {
477               *barray++ = *value++;
478             }
479           }
480           barray -=bs2;
481         }
482 
483         if (in[j] >= cstart && in[j] < cend) {
484           col  = in[j] - cstart;
485           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
486         } else if (in[j] < 0) continue;
487 #if defined(PETSC_USE_DEBUG)
488         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
489 #endif
490         else {
491           if (mat->was_assembled) {
492             if (!baij->colmap) {
493               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
494             }
495 
496 #if defined(PETSC_USE_DEBUG)
497 #if defined(PETSC_USE_CTABLE)
498             { PetscInt data;
499               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
500               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
501             }
502 #else
503             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
504 #endif
505 #endif
506 #if defined(PETSC_USE_CTABLE)
507             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
508             col  = (col - 1)/bs;
509 #else
510             col = (baij->colmap[in[j]] - 1)/bs;
511 #endif
512             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
513               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
514               col  = in[j];
515             }
516           } else col = in[j];
517           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
518         }
519       }
520     } else {
521       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
522       if (!baij->donotstash) {
523         if (roworiented) {
524           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
525         } else {
526           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
527         }
528       }
529     }
530   }
531   PetscFunctionReturn(0);
532 }
533 
534 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
535 {
536   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
537   PetscErrorCode ierr;
538   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
539   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
540 
541   PetscFunctionBegin;
542   for (i=0; i<m; i++) {
543     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
544     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
545     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
546       row = idxm[i] - bsrstart;
547       for (j=0; j<n; j++) {
548         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
549         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
550         if (idxn[j] >= bscstart && idxn[j] < bscend) {
551           col  = idxn[j] - bscstart;
552           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
553         } else {
554           if (!baij->colmap) {
555             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
556           }
557 #if defined(PETSC_USE_CTABLE)
558           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
559           data--;
560 #else
561           data = baij->colmap[idxn[j]/bs]-1;
562 #endif
563           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
564           else {
565             col  = data + idxn[j]%bs;
566             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
567           }
568         }
569       }
570     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
571   }
572   PetscFunctionReturn(0);
573 }
574 
575 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
576 {
577   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
578   PetscErrorCode ierr;
579   PetscReal      sum[2],*lnorm2;
580 
581   PetscFunctionBegin;
582   if (baij->size == 1) {
583     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
584   } else {
585     if (type == NORM_FROBENIUS) {
586       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
587       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
588       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
589       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
590       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
591       ierr    = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
592       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
593       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
594     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
595       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
596       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
597       PetscReal    *rsum,*rsum2,vabs;
598       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
599       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
600       MatScalar    *v;
601 
602       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
603       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
604       /* Amat */
605       v = amat->a; jj = amat->j;
606       for (brow=0; brow<mbs; brow++) {
607         grow = bs*(rstart + brow);
608         nz   = amat->i[brow+1] - amat->i[brow];
609         for (bcol=0; bcol<nz; bcol++) {
610           gcol = bs*(rstart + *jj); jj++;
611           for (col=0; col<bs; col++) {
612             for (row=0; row<bs; row++) {
613               vabs            = PetscAbsScalar(*v); v++;
614               rsum[gcol+col] += vabs;
615               /* non-diagonal block */
616               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
617             }
618           }
619         }
620         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
621       }
622       /* Bmat */
623       v = bmat->a; jj = bmat->j;
624       for (brow=0; brow<mbs; brow++) {
625         grow = bs*(rstart + brow);
626         nz = bmat->i[brow+1] - bmat->i[brow];
627         for (bcol=0; bcol<nz; bcol++) {
628           gcol = bs*garray[*jj]; jj++;
629           for (col=0; col<bs; col++) {
630             for (row=0; row<bs; row++) {
631               vabs            = PetscAbsScalar(*v); v++;
632               rsum[gcol+col] += vabs;
633               rsum[grow+row] += vabs;
634             }
635           }
636         }
637         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
638       }
639       ierr  = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
640       *norm = 0.0;
641       for (col=0; col<mat->cmap->N; col++) {
642         if (rsum2[col] > *norm) *norm = rsum2[col];
643       }
644       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
645     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
646   }
647   PetscFunctionReturn(0);
648 }
649 
650 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
651 {
652   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
653   PetscErrorCode ierr;
654   PetscInt       nstash,reallocs;
655 
656   PetscFunctionBegin;
657   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
658 
659   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
660   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
661   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
662   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
663   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
664   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
665   PetscFunctionReturn(0);
666 }
667 
668 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
669 {
670   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
671   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
672   PetscErrorCode ierr;
673   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
674   PetscInt       *row,*col;
675   PetscBool      other_disassembled;
676   PetscMPIInt    n;
677   PetscBool      r1,r2,r3;
678   MatScalar      *val;
679 
680   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
681   PetscFunctionBegin;
682   if (!baij->donotstash &&  !mat->nooffprocentries) {
683     while (1) {
684       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
685       if (!flg) break;
686 
687       for (i=0; i<n;) {
688         /* Now identify the consecutive vals belonging to the same row */
689         for (j=i,rstart=row[j]; j<n; j++) {
690           if (row[j] != rstart) break;
691         }
692         if (j < n) ncols = j-i;
693         else       ncols = n-i;
694         /* Now assemble all these values with a single function call */
695         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
696         i    = j;
697       }
698     }
699     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
700     /* Now process the block-stash. Since the values are stashed column-oriented,
701        set the roworiented flag to column oriented, and after MatSetValues()
702        restore the original flags */
703     r1 = baij->roworiented;
704     r2 = a->roworiented;
705     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
706 
707     baij->roworiented = PETSC_FALSE;
708     a->roworiented    = PETSC_FALSE;
709 
710     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
711     while (1) {
712       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
713       if (!flg) break;
714 
715       for (i=0; i<n;) {
716         /* Now identify the consecutive vals belonging to the same row */
717         for (j=i,rstart=row[j]; j<n; j++) {
718           if (row[j] != rstart) break;
719         }
720         if (j < n) ncols = j-i;
721         else       ncols = n-i;
722         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
723         i    = j;
724       }
725     }
726     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
727 
728     baij->roworiented = r1;
729     a->roworiented    = r2;
730 
731     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
732   }
733 
734   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
735   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
736 
737   /* determine if any processor has disassembled, if so we must
738      also disassemble ourselfs, in order that we may reassemble. */
739   /*
740      if nonzero structure of submatrix B cannot change then we know that
741      no processor disassembled thus we can skip this stuff
742   */
743   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
744     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
745     if (mat->was_assembled && !other_disassembled) {
746       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
747     }
748   }
749 
750   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
751     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
752   }
753   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
754   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
755 
756   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
757 
758   baij->rowvalues = 0;
759 
760   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
761   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
762     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
763     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
769 #include <petscdraw.h>
770 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
771 {
772   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
773   PetscErrorCode    ierr;
774   PetscInt          bs   = mat->rmap->bs;
775   PetscMPIInt       rank = baij->rank;
776   PetscBool         iascii,isdraw;
777   PetscViewer       sviewer;
778   PetscViewerFormat format;
779 
780   PetscFunctionBegin;
781   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
782   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
783   if (iascii) {
784     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
785     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
786       MatInfo info;
787       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
788       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
789       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
790       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
791       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
792       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
793       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
794       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
795       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
796       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
797       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
798       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
799       PetscFunctionReturn(0);
800     } else if (format == PETSC_VIEWER_ASCII_INFO) {
801       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
802       PetscFunctionReturn(0);
803     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
804       PetscFunctionReturn(0);
805     }
806   }
807 
808   if (isdraw) {
809     PetscDraw draw;
810     PetscBool isnull;
811     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
812     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
813     if (isnull) PetscFunctionReturn(0);
814   }
815 
816   {
817     /* assemble the entire matrix onto first processor. */
818     Mat          A;
819     Mat_SeqSBAIJ *Aloc;
820     Mat_SeqBAIJ  *Bloc;
821     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
822     MatScalar    *a;
823     const char   *matname;
824 
825     /* Should this be the same type as mat? */
826     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
827     if (!rank) {
828       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
829     } else {
830       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
831     }
832     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
833     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
834     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
835     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
836 
837     /* copy over the A part */
838     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
839     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
840     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
841 
842     for (i=0; i<mbs; i++) {
843       rvals[0] = bs*(baij->rstartbs + i);
844       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
845       for (j=ai[i]; j<ai[i+1]; j++) {
846         col = (baij->cstartbs+aj[j])*bs;
847         for (k=0; k<bs; k++) {
848           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
849           col++;
850           a += bs;
851         }
852       }
853     }
854     /* copy over the B part */
855     Bloc = (Mat_SeqBAIJ*)baij->B->data;
856     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
857     for (i=0; i<mbs; i++) {
858 
859       rvals[0] = bs*(baij->rstartbs + i);
860       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
861       for (j=ai[i]; j<ai[i+1]; j++) {
862         col = baij->garray[aj[j]]*bs;
863         for (k=0; k<bs; k++) {
864           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
865           col++;
866           a += bs;
867         }
868       }
869     }
870     ierr = PetscFree(rvals);CHKERRQ(ierr);
871     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
872     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
873     /*
874        Everyone has to call to draw the matrix since the graphics waits are
875        synchronized across all processors that share the PetscDraw object
876     */
877     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
878     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
879     if (!rank) {
880       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
881       ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
882     }
883     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
884     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
885     ierr = MatDestroy(&A);CHKERRQ(ierr);
886   }
887   PetscFunctionReturn(0);
888 }
889 
890 static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer)
891 {
892   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)mat->data;
893   Mat_SeqSBAIJ   *A = (Mat_SeqSBAIJ*)a->A->data;
894   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
895   PetscErrorCode ierr;
896   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
897   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
898   int            fd;
899   PetscScalar    *column_values;
900   FILE           *file;
901   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
902   PetscInt       message_count,flowcontrolcount;
903 
904   PetscFunctionBegin;
905   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
906   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
907   nz   = bs2*(A->nz + B->nz);
908   rlen = mat->rmap->n;
909   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
910   if (!rank) {
911     header[0] = MAT_FILE_CLASSID;
912     header[1] = mat->rmap->N;
913     header[2] = mat->cmap->N;
914 
915     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
916     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
917     /* get largest number of rows any processor has */
918     range = mat->rmap->range;
919     for (i=1; i<size; i++) {
920       rlen = PetscMax(rlen,range[i+1] - range[i]);
921     }
922   } else {
923     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
924   }
925 
926   ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr);
927   /* compute lengths of each row  */
928   for (i=0; i<a->mbs; i++) {
929     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
930   }
931   /* store the row lengths to the file */
932   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
933   if (!rank) {
934     MPI_Status status;
935     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
936     rlen = (range[1] - range[0])/bs;
937     for (i=0; i<rlen; i++) {
938       for (j=0; j<bs; j++) {
939         row_lens[i*bs+j] = bs*crow_lens[i];
940       }
941     }
942     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
943     for (i=1; i<size; i++) {
944       rlen = (range[i+1] - range[i])/bs;
945       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
946       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
947       for (k=0; k<rlen; k++) {
948         for (j=0; j<bs; j++) {
949           row_lens[k*bs+j] = bs*crow_lens[k];
950         }
951       }
952       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
953     }
954     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
955     ierr = PetscFree(row_lens);CHKERRQ(ierr);
956   } else {
957     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
958     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
959     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
960   }
961   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
962 
963   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
964      information needed to make it for each row from a block row. This does require more communication but still not more than
965      the communication needed for the nonzero values  */
966   nzmax = nz; /*  space a largest processor needs */
967   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
968   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
969   cnt   = 0;
970   for (i=0; i<a->mbs; i++) {
971     pcnt = cnt;
972     for (j=B->i[i]; j<B->i[i+1]; j++) {
973       if ((col = garray[B->j[j]]) > cstart) break;
974       for (l=0; l<bs; l++) {
975         column_indices[cnt++] = bs*col+l;
976       }
977     }
978     for (k=A->i[i]; k<A->i[i+1]; k++) {
979       for (l=0; l<bs; l++) {
980         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
981       }
982     }
983     for (; j<B->i[i+1]; j++) {
984       for (l=0; l<bs; l++) {
985         column_indices[cnt++] = bs*garray[B->j[j]]+l;
986       }
987     }
988     len = cnt - pcnt;
989     for (k=1; k<bs; k++) {
990       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
991       cnt += len;
992     }
993   }
994   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
995 
996   /* store the columns to the file */
997   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
998   if (!rank) {
999     MPI_Status status;
1000     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1001     for (i=1; i<size; i++) {
1002       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1003       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1004       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1005       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1006     }
1007     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1008   } else {
1009     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1010     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1011     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1012     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1013   }
1014   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1015 
1016   /* load up the numerical values */
1017   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1018   cnt  = 0;
1019   for (i=0; i<a->mbs; i++) {
1020     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1021     for (j=B->i[i]; j<B->i[i+1]; j++) {
1022       if (garray[B->j[j]] > cstart) break;
1023       for (l=0; l<bs; l++) {
1024         for (ll=0; ll<bs; ll++) {
1025           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1026         }
1027       }
1028       cnt += bs;
1029     }
1030     for (k=A->i[i]; k<A->i[i+1]; k++) {
1031       for (l=0; l<bs; l++) {
1032         for (ll=0; ll<bs; ll++) {
1033           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1034         }
1035       }
1036       cnt += bs;
1037     }
1038     for (; j<B->i[i+1]; j++) {
1039       for (l=0; l<bs; l++) {
1040         for (ll=0; ll<bs; ll++) {
1041           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1042         }
1043       }
1044       cnt += bs;
1045     }
1046     cnt += (bs-1)*rlen;
1047   }
1048   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1049 
1050   /* store the column values to the file */
1051   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1052   if (!rank) {
1053     MPI_Status status;
1054     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1055     for (i=1; i<size; i++) {
1056       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1057       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1058       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1059       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1060     }
1061     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1062   } else {
1063     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1064     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1065     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1066     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1067   }
1068   ierr = PetscFree(column_values);CHKERRQ(ierr);
1069 
1070   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1071   if (file) {
1072     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1073   }
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
1078 {
1079   PetscErrorCode ierr;
1080   PetscBool      iascii,isdraw,issocket,isbinary;
1081 
1082   PetscFunctionBegin;
1083   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1084   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1085   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1086   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1087   if (iascii || isdraw || issocket) {
1088     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1089   } else if (isbinary) {
1090     ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1091   }
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
1096 {
1097   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1098   PetscErrorCode ierr;
1099 
1100   PetscFunctionBegin;
1101 #if defined(PETSC_USE_LOG)
1102   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1103 #endif
1104   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1105   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1106   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1107   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1108 #if defined(PETSC_USE_CTABLE)
1109   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1110 #else
1111   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1112 #endif
1113   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1114   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1115   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1116   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
1117   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
1118   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
1119   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
1120   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
1121   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
1122   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1123   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1124   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1125   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
1126   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
1127   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
1128 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1129   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1130 #endif
1131   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
1132   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
1133   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1134   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1135 
1136   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1137   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1138   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1139   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1140   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
1141 #if defined(PETSC_HAVE_ELEMENTAL)
1142   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
1143 #endif
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1150   PetscErrorCode    ierr;
1151   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1152   PetscScalar       *from;
1153   const PetscScalar *x;
1154 
1155   PetscFunctionBegin;
1156   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1157   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1158 
1159   /* diagonal part */
1160   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1161   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1162 
1163   /* subdiagonal part */
1164   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1165 
1166   /* copy x into the vec slvec0 */
1167   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1168   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1169 
1170   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1171   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1172   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1173 
1174   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1175   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1176   /* supperdiagonal part */
1177   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1182 {
1183   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1184   PetscErrorCode    ierr;
1185   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1186   PetscScalar       *from;
1187   const PetscScalar *x;
1188 
1189   PetscFunctionBegin;
1190   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1191   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1192 
1193   /* diagonal part */
1194   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1195   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1196 
1197   /* subdiagonal part */
1198   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1199 
1200   /* copy x into the vec slvec0 */
1201   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1202   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1203 
1204   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1205   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1206   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1207 
1208   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1209   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1210   /* supperdiagonal part */
1211   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1212   PetscFunctionReturn(0);
1213 }
1214 
1215 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1216 {
1217   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1218   PetscErrorCode ierr;
1219   PetscInt       nt;
1220 
1221   PetscFunctionBegin;
1222   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1223   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1224 
1225   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1226   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1227 
1228   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1229   /* do diagonal part */
1230   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1231   /* do supperdiagonal part */
1232   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1233   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1234   /* do subdiagonal part */
1235   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1236   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1237   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1242 {
1243   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1244   PetscErrorCode    ierr;
1245   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1246   PetscScalar       *from,zero=0.0;
1247   const PetscScalar *x;
1248 
1249   PetscFunctionBegin;
1250   /*
1251   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
1252   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
1253   */
1254   /* diagonal part */
1255   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1256   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1257 
1258   /* subdiagonal part */
1259   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1260 
1261   /* copy x into the vec slvec0 */
1262   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1263   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1264   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1265   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1266 
1267   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1268   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1269   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1270 
1271   /* supperdiagonal part */
1272   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1277 {
1278   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1279   PetscErrorCode ierr;
1280 
1281   PetscFunctionBegin;
1282   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1283   /* do diagonal part */
1284   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1285   /* do supperdiagonal part */
1286   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1287   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1288 
1289   /* do subdiagonal part */
1290   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1291   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1292   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1293   PetscFunctionReturn(0);
1294 }
1295 
1296 /*
1297   This only works correctly for square matrices where the subblock A->A is the
1298    diagonal block
1299 */
1300 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1301 {
1302   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1303   PetscErrorCode ierr;
1304 
1305   PetscFunctionBegin;
1306   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1307   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1312 {
1313   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1314   PetscErrorCode ierr;
1315 
1316   PetscFunctionBegin;
1317   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1318   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1323 {
1324   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1325   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1326   PetscErrorCode ierr;
1327   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1328   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1329   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1330 
1331   PetscFunctionBegin;
1332   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1333   mat->getrowactive = PETSC_TRUE;
1334 
1335   if (!mat->rowvalues && (idx || v)) {
1336     /*
1337         allocate enough space to hold information from the longest row.
1338     */
1339     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1340     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1341     PetscInt     max = 1,mbs = mat->mbs,tmp;
1342     for (i=0; i<mbs; i++) {
1343       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1344       if (max < tmp) max = tmp;
1345     }
1346     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1347   }
1348 
1349   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1350   lrow = row - brstart;  /* local row index */
1351 
1352   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1353   if (!v)   {pvA = 0; pvB = 0;}
1354   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1355   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1356   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1357   nztot = nzA + nzB;
1358 
1359   cmap = mat->garray;
1360   if (v  || idx) {
1361     if (nztot) {
1362       /* Sort by increasing column numbers, assuming A and B already sorted */
1363       PetscInt imark = -1;
1364       if (v) {
1365         *v = v_p = mat->rowvalues;
1366         for (i=0; i<nzB; i++) {
1367           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1368           else break;
1369         }
1370         imark = i;
1371         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1372         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1373       }
1374       if (idx) {
1375         *idx = idx_p = mat->rowindices;
1376         if (imark > -1) {
1377           for (i=0; i<imark; i++) {
1378             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1379           }
1380         } else {
1381           for (i=0; i<nzB; i++) {
1382             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1383             else break;
1384           }
1385           imark = i;
1386         }
1387         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1388         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1389       }
1390     } else {
1391       if (idx) *idx = 0;
1392       if (v)   *v   = 0;
1393     }
1394   }
1395   *nz  = nztot;
1396   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1397   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1402 {
1403   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1404 
1405   PetscFunctionBegin;
1406   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1407   baij->getrowactive = PETSC_FALSE;
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1412 {
1413   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1414   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1415 
1416   PetscFunctionBegin;
1417   aA->getrow_utriangular = PETSC_TRUE;
1418   PetscFunctionReturn(0);
1419 }
1420 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1421 {
1422   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1423   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1424 
1425   PetscFunctionBegin;
1426   aA->getrow_utriangular = PETSC_FALSE;
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1431 {
1432   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1433   PetscErrorCode ierr;
1434 
1435   PetscFunctionBegin;
1436   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1437   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1442 {
1443   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1444   PetscErrorCode ierr;
1445 
1446   PetscFunctionBegin;
1447   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1448   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1449   PetscFunctionReturn(0);
1450 }
1451 
1452 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1453    Input: isrow       - distributed(parallel),
1454           iscol_local - locally owned (seq)
1455 */
1456 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1457 {
1458   PetscErrorCode ierr;
1459   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1460   const PetscInt *ptr1,*ptr2;
1461 
1462   PetscFunctionBegin;
1463   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1464   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1465   if (sz1 > sz2) {
1466     *flg = PETSC_FALSE;
1467     PetscFunctionReturn(0);
1468   }
1469 
1470   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1471   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1472 
1473   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1474   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1475   ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr);
1476   ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr);
1477   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1478   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1479 
1480   nmatch=0;
1481   k     = 0;
1482   for (i=0; i<sz1; i++){
1483     for (j=k; j<sz2; j++){
1484       if (a1[i] == a2[j]) {
1485         k = j; nmatch++;
1486         break;
1487       }
1488     }
1489   }
1490   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1491   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1492   ierr = PetscFree(a1);CHKERRQ(ierr);
1493   ierr = PetscFree(a2);CHKERRQ(ierr);
1494   if (nmatch < sz1) {
1495     *flg = PETSC_FALSE;
1496   } else {
1497     *flg = PETSC_TRUE;
1498   }
1499   PetscFunctionReturn(0);
1500 }
1501 
1502 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1503 {
1504   PetscErrorCode ierr;
1505   IS             iscol_local;
1506   PetscInt       csize;
1507   PetscBool      isequal;
1508 
1509   PetscFunctionBegin;
1510   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1511   if (call == MAT_REUSE_MATRIX) {
1512     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1513     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1514   } else {
1515     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1516     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1517     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1518   }
1519 
1520   /* now call MatCreateSubMatrix_MPIBAIJ() */
1521   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1522   if (call == MAT_INITIAL_MATRIX) {
1523     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1524     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1525   }
1526   PetscFunctionReturn(0);
1527 }
1528 
1529 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1530 {
1531   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1532   PetscErrorCode ierr;
1533 
1534   PetscFunctionBegin;
1535   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1536   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1537   PetscFunctionReturn(0);
1538 }
1539 
1540 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1541 {
1542   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1543   Mat            A  = a->A,B = a->B;
1544   PetscErrorCode ierr;
1545   PetscReal      isend[5],irecv[5];
1546 
1547   PetscFunctionBegin;
1548   info->block_size = (PetscReal)matin->rmap->bs;
1549 
1550   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1551 
1552   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1553   isend[3] = info->memory;  isend[4] = info->mallocs;
1554 
1555   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1556 
1557   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1558   isend[3] += info->memory;  isend[4] += info->mallocs;
1559   if (flag == MAT_LOCAL) {
1560     info->nz_used      = isend[0];
1561     info->nz_allocated = isend[1];
1562     info->nz_unneeded  = isend[2];
1563     info->memory       = isend[3];
1564     info->mallocs      = isend[4];
1565   } else if (flag == MAT_GLOBAL_MAX) {
1566     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1567 
1568     info->nz_used      = irecv[0];
1569     info->nz_allocated = irecv[1];
1570     info->nz_unneeded  = irecv[2];
1571     info->memory       = irecv[3];
1572     info->mallocs      = irecv[4];
1573   } else if (flag == MAT_GLOBAL_SUM) {
1574     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1575 
1576     info->nz_used      = irecv[0];
1577     info->nz_allocated = irecv[1];
1578     info->nz_unneeded  = irecv[2];
1579     info->memory       = irecv[3];
1580     info->mallocs      = irecv[4];
1581   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1582   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1583   info->fill_ratio_needed = 0;
1584   info->factor_mallocs    = 0;
1585   PetscFunctionReturn(0);
1586 }
1587 
1588 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1589 {
1590   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1591   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1592   PetscErrorCode ierr;
1593 
1594   PetscFunctionBegin;
1595   switch (op) {
1596   case MAT_NEW_NONZERO_LOCATIONS:
1597   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1598   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1599   case MAT_KEEP_NONZERO_PATTERN:
1600   case MAT_SUBMAT_SINGLEIS:
1601   case MAT_NEW_NONZERO_LOCATION_ERR:
1602     MatCheckPreallocated(A,1);
1603     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1604     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1605     break;
1606   case MAT_ROW_ORIENTED:
1607     MatCheckPreallocated(A,1);
1608     a->roworiented = flg;
1609 
1610     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1611     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1612     break;
1613   case MAT_NEW_DIAGONALS:
1614     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1615     break;
1616   case MAT_IGNORE_OFF_PROC_ENTRIES:
1617     a->donotstash = flg;
1618     break;
1619   case MAT_USE_HASH_TABLE:
1620     a->ht_flag = flg;
1621     break;
1622   case MAT_HERMITIAN:
1623     MatCheckPreallocated(A,1);
1624     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1625     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1626 #if defined(PETSC_USE_COMPLEX)
1627     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1628 #endif
1629     break;
1630   case MAT_SPD:
1631     A->spd_set = PETSC_TRUE;
1632     A->spd     = flg;
1633     if (flg) {
1634       A->symmetric                  = PETSC_TRUE;
1635       A->structurally_symmetric     = PETSC_TRUE;
1636       A->symmetric_set              = PETSC_TRUE;
1637       A->structurally_symmetric_set = PETSC_TRUE;
1638     }
1639     break;
1640   case MAT_SYMMETRIC:
1641     MatCheckPreallocated(A,1);
1642     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1643     break;
1644   case MAT_STRUCTURALLY_SYMMETRIC:
1645     MatCheckPreallocated(A,1);
1646     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1647     break;
1648   case MAT_SYMMETRY_ETERNAL:
1649     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1650     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1651     break;
1652   case MAT_IGNORE_LOWER_TRIANGULAR:
1653     aA->ignore_ltriangular = flg;
1654     break;
1655   case MAT_ERROR_LOWER_TRIANGULAR:
1656     aA->ignore_ltriangular = flg;
1657     break;
1658   case MAT_GETROW_UPPERTRIANGULAR:
1659     aA->getrow_utriangular = flg;
1660     break;
1661   default:
1662     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1663   }
1664   PetscFunctionReturn(0);
1665 }
1666 
1667 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1668 {
1669   PetscErrorCode ierr;
1670 
1671   PetscFunctionBegin;
1672   if (reuse == MAT_INITIAL_MATRIX) {
1673     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1674   }  else if (reuse == MAT_REUSE_MATRIX) {
1675     ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1676   }
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1681 {
1682   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1683   Mat            a     = baij->A, b=baij->B;
1684   PetscErrorCode ierr;
1685   PetscInt       nv,m,n;
1686   PetscBool      flg;
1687 
1688   PetscFunctionBegin;
1689   if (ll != rr) {
1690     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1691     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1692   }
1693   if (!ll) PetscFunctionReturn(0);
1694 
1695   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1696   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1697 
1698   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1699   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1700 
1701   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1702 
1703   /* left diagonalscale the off-diagonal part */
1704   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1705 
1706   /* scale the diagonal part */
1707   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1708 
1709   /* right diagonalscale the off-diagonal part */
1710   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1711   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1712   PetscFunctionReturn(0);
1713 }
1714 
1715 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1716 {
1717   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1718   PetscErrorCode ierr;
1719 
1720   PetscFunctionBegin;
1721   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1722   PetscFunctionReturn(0);
1723 }
1724 
1725 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1726 
1727 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1728 {
1729   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1730   Mat            a,b,c,d;
1731   PetscBool      flg;
1732   PetscErrorCode ierr;
1733 
1734   PetscFunctionBegin;
1735   a = matA->A; b = matA->B;
1736   c = matB->A; d = matB->B;
1737 
1738   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1739   if (flg) {
1740     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1741   }
1742   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1743   PetscFunctionReturn(0);
1744 }
1745 
1746 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1747 {
1748   PetscErrorCode ierr;
1749   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1750   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1751 
1752   PetscFunctionBegin;
1753   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1754   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1755     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1756     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1757     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1758   } else {
1759     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1760     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1761   }
1762   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1763   PetscFunctionReturn(0);
1764 }
1765 
1766 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1767 {
1768   PetscErrorCode ierr;
1769 
1770   PetscFunctionBegin;
1771   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1772   PetscFunctionReturn(0);
1773 }
1774 
1775 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1776 {
1777   PetscErrorCode ierr;
1778   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1779   PetscBLASInt   bnz,one=1;
1780   Mat_SeqSBAIJ   *xa,*ya;
1781   Mat_SeqBAIJ    *xb,*yb;
1782 
1783   PetscFunctionBegin;
1784   if (str == SAME_NONZERO_PATTERN) {
1785     PetscScalar alpha = a;
1786     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1787     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1788     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1789     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1790     xb   = (Mat_SeqBAIJ*)xx->B->data;
1791     yb   = (Mat_SeqBAIJ*)yy->B->data;
1792     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1793     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1794     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1795   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1796     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1797     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1798     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1799   } else {
1800     Mat      B;
1801     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1802     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1803     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1804     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1805     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1806     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1807     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1808     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1809     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1810     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1811     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1812     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1813     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1814     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1815     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1816     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1817     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1818     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1819     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1820     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1821   }
1822   PetscFunctionReturn(0);
1823 }
1824 
1825 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1826 {
1827   PetscErrorCode ierr;
1828   PetscInt       i;
1829   PetscBool      flg;
1830 
1831   PetscFunctionBegin;
1832   ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */
1833   for (i=0; i<n; i++) {
1834     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1835     if (!flg) {
1836       ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr);
1837     }
1838   }
1839   PetscFunctionReturn(0);
1840 }
1841 
1842 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1843 {
1844   PetscErrorCode ierr;
1845   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1846   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1847 
1848   PetscFunctionBegin;
1849   if (!Y->preallocated) {
1850     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1851   } else if (!aij->nz) {
1852     PetscInt nonew = aij->nonew;
1853     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1854     aij->nonew = nonew;
1855   }
1856   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1857   PetscFunctionReturn(0);
1858 }
1859 
1860 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1861 {
1862   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1863   PetscErrorCode ierr;
1864 
1865   PetscFunctionBegin;
1866   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1867   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
1868   if (d) {
1869     PetscInt rstart;
1870     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1871     *d += rstart/A->rmap->bs;
1872 
1873   }
1874   PetscFunctionReturn(0);
1875 }
1876 
1877 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1878 {
1879   PetscFunctionBegin;
1880   *a = ((Mat_MPISBAIJ*)A->data)->A;
1881   PetscFunctionReturn(0);
1882 }
1883 
1884 /* -------------------------------------------------------------------*/
1885 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1886                                        MatGetRow_MPISBAIJ,
1887                                        MatRestoreRow_MPISBAIJ,
1888                                        MatMult_MPISBAIJ,
1889                                /*  4*/ MatMultAdd_MPISBAIJ,
1890                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1891                                        MatMultAdd_MPISBAIJ,
1892                                        0,
1893                                        0,
1894                                        0,
1895                                /* 10*/ 0,
1896                                        0,
1897                                        0,
1898                                        MatSOR_MPISBAIJ,
1899                                        MatTranspose_MPISBAIJ,
1900                                /* 15*/ MatGetInfo_MPISBAIJ,
1901                                        MatEqual_MPISBAIJ,
1902                                        MatGetDiagonal_MPISBAIJ,
1903                                        MatDiagonalScale_MPISBAIJ,
1904                                        MatNorm_MPISBAIJ,
1905                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1906                                        MatAssemblyEnd_MPISBAIJ,
1907                                        MatSetOption_MPISBAIJ,
1908                                        MatZeroEntries_MPISBAIJ,
1909                                /* 24*/ 0,
1910                                        0,
1911                                        0,
1912                                        0,
1913                                        0,
1914                                /* 29*/ MatSetUp_MPISBAIJ,
1915                                        0,
1916                                        0,
1917                                        MatGetDiagonalBlock_MPISBAIJ,
1918                                        0,
1919                                /* 34*/ MatDuplicate_MPISBAIJ,
1920                                        0,
1921                                        0,
1922                                        0,
1923                                        0,
1924                                /* 39*/ MatAXPY_MPISBAIJ,
1925                                        MatCreateSubMatrices_MPISBAIJ,
1926                                        MatIncreaseOverlap_MPISBAIJ,
1927                                        MatGetValues_MPISBAIJ,
1928                                        MatCopy_MPISBAIJ,
1929                                /* 44*/ 0,
1930                                        MatScale_MPISBAIJ,
1931                                        MatShift_MPISBAIJ,
1932                                        0,
1933                                        0,
1934                                /* 49*/ 0,
1935                                        0,
1936                                        0,
1937                                        0,
1938                                        0,
1939                                /* 54*/ 0,
1940                                        0,
1941                                        MatSetUnfactored_MPISBAIJ,
1942                                        0,
1943                                        MatSetValuesBlocked_MPISBAIJ,
1944                                /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1945                                        0,
1946                                        0,
1947                                        0,
1948                                        0,
1949                                /* 64*/ 0,
1950                                        0,
1951                                        0,
1952                                        0,
1953                                        0,
1954                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1955                                        0,
1956                                        0,
1957                                        0,
1958                                        0,
1959                                /* 74*/ 0,
1960                                        0,
1961                                        0,
1962                                        0,
1963                                        0,
1964                                /* 79*/ 0,
1965                                        0,
1966                                        0,
1967                                        0,
1968                                        MatLoad_MPISBAIJ,
1969                                /* 84*/ 0,
1970                                        0,
1971                                        0,
1972                                        0,
1973                                        0,
1974                                /* 89*/ 0,
1975                                        0,
1976                                        0,
1977                                        0,
1978                                        0,
1979                                /* 94*/ 0,
1980                                        0,
1981                                        0,
1982                                        0,
1983                                        0,
1984                                /* 99*/ 0,
1985                                        0,
1986                                        0,
1987                                        0,
1988                                        0,
1989                                /*104*/ 0,
1990                                        MatRealPart_MPISBAIJ,
1991                                        MatImaginaryPart_MPISBAIJ,
1992                                        MatGetRowUpperTriangular_MPISBAIJ,
1993                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1994                                /*109*/ 0,
1995                                        0,
1996                                        0,
1997                                        0,
1998                                        MatMissingDiagonal_MPISBAIJ,
1999                                /*114*/ 0,
2000                                        0,
2001                                        0,
2002                                        0,
2003                                        0,
2004                                /*119*/ 0,
2005                                        0,
2006                                        0,
2007                                        0,
2008                                        0,
2009                                /*124*/ 0,
2010                                        0,
2011                                        0,
2012                                        0,
2013                                        0,
2014                                /*129*/ 0,
2015                                        0,
2016                                        0,
2017                                        0,
2018                                        0,
2019                                /*134*/ 0,
2020                                        0,
2021                                        0,
2022                                        0,
2023                                        0,
2024                                /*139*/ MatSetBlockSizes_Default,
2025                                        0,
2026                                        0,
2027                                        0,
2028                                        0,
2029                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
2030 };
2031 
2032 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2033 {
2034   Mat_MPISBAIJ   *b;
2035   PetscErrorCode ierr;
2036   PetscInt       i,mbs,Mbs;
2037 
2038   PetscFunctionBegin;
2039   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2040   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2041   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2042   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2043 
2044   b   = (Mat_MPISBAIJ*)B->data;
2045   mbs = B->rmap->n/bs;
2046   Mbs = B->rmap->N/bs;
2047   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
2048 
2049   B->rmap->bs = bs;
2050   b->bs2      = bs*bs;
2051   b->mbs      = mbs;
2052   b->Mbs      = Mbs;
2053   b->nbs      = B->cmap->n/bs;
2054   b->Nbs      = B->cmap->N/bs;
2055 
2056   for (i=0; i<=b->size; i++) {
2057     b->rangebs[i] = B->rmap->range[i]/bs;
2058   }
2059   b->rstartbs = B->rmap->rstart/bs;
2060   b->rendbs   = B->rmap->rend/bs;
2061 
2062   b->cstartbs = B->cmap->rstart/bs;
2063   b->cendbs   = B->cmap->rend/bs;
2064 
2065 #if defined(PETSC_USE_CTABLE)
2066   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2067 #else
2068   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2069 #endif
2070   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2071   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2072   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2073   ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr);
2074   ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr);
2075   ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr);
2076   ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr);
2077   ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr);
2078   ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr);
2079 
2080   /* Because the B will have been resized we simply destroy it and create a new one each time */
2081   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2082   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2083   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2084   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2085   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2086 
2087   if (!B->preallocated) {
2088     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2089     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2090     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
2091     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2092     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2093   }
2094 
2095   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2096   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2097 
2098   B->preallocated  = PETSC_TRUE;
2099   B->was_assembled = PETSC_FALSE;
2100   B->assembled     = PETSC_FALSE;
2101   PetscFunctionReturn(0);
2102 }
2103 
2104 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2105 {
2106   PetscInt       m,rstart,cstart,cend;
2107   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2108   const PetscInt *JJ    =0;
2109   PetscScalar    *values=0;
2110   PetscErrorCode ierr;
2111 
2112   PetscFunctionBegin;
2113   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2114   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2115   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2116   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2117   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2118   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2119   m      = B->rmap->n/bs;
2120   rstart = B->rmap->rstart/bs;
2121   cstart = B->cmap->rstart/bs;
2122   cend   = B->cmap->rend/bs;
2123 
2124   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2125   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2126   for (i=0; i<m; i++) {
2127     nz = ii[i+1] - ii[i];
2128     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2129     nz_max = PetscMax(nz_max,nz);
2130     JJ     = jj + ii[i];
2131     for (j=0; j<nz; j++) {
2132       if (*JJ >= cstart) break;
2133       JJ++;
2134     }
2135     d = 0;
2136     for (; j<nz; j++) {
2137       if (*JJ++ >= cend) break;
2138       d++;
2139     }
2140     d_nnz[i] = d;
2141     o_nnz[i] = nz - d;
2142   }
2143   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2144   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2145 
2146   values = (PetscScalar*)V;
2147   if (!values) {
2148     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2149     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2150   }
2151   for (i=0; i<m; i++) {
2152     PetscInt          row    = i + rstart;
2153     PetscInt          ncols  = ii[i+1] - ii[i];
2154     const PetscInt    *icols = jj + ii[i];
2155     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2156     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2157   }
2158 
2159   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2160   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2161   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2162   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 /*MC
2167    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2168    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2169    the matrix is stored.
2170 
2171   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2172   can call MatSetOption(Mat, MAT_HERMITIAN);
2173 
2174    Options Database Keys:
2175 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2176 
2177   Level: beginner
2178 
2179 .seealso: MatCreateMPISBAIJ
2180 M*/
2181 
2182 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
2183 
2184 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2185 {
2186   Mat_MPISBAIJ   *b;
2187   PetscErrorCode ierr;
2188   PetscBool      flg = PETSC_FALSE;
2189 
2190   PetscFunctionBegin;
2191   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2192   B->data = (void*)b;
2193   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2194 
2195   B->ops->destroy = MatDestroy_MPISBAIJ;
2196   B->ops->view    = MatView_MPISBAIJ;
2197   B->assembled    = PETSC_FALSE;
2198   B->insertmode   = NOT_SET_VALUES;
2199 
2200   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2201   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2202 
2203   /* build local table of row and column ownerships */
2204   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2205 
2206   /* build cache for off array entries formed */
2207   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2208 
2209   b->donotstash  = PETSC_FALSE;
2210   b->colmap      = NULL;
2211   b->garray      = NULL;
2212   b->roworiented = PETSC_TRUE;
2213 
2214   /* stuff used in block assembly */
2215   b->barray = 0;
2216 
2217   /* stuff used for matrix vector multiply */
2218   b->lvec    = 0;
2219   b->Mvctx   = 0;
2220   b->slvec0  = 0;
2221   b->slvec0b = 0;
2222   b->slvec1  = 0;
2223   b->slvec1a = 0;
2224   b->slvec1b = 0;
2225   b->sMvctx  = 0;
2226 
2227   /* stuff for MatGetRow() */
2228   b->rowindices   = 0;
2229   b->rowvalues    = 0;
2230   b->getrowactive = PETSC_FALSE;
2231 
2232   /* hash table stuff */
2233   b->ht           = 0;
2234   b->hd           = 0;
2235   b->ht_size      = 0;
2236   b->ht_flag      = PETSC_FALSE;
2237   b->ht_fact      = 0;
2238   b->ht_total_ct  = 0;
2239   b->ht_insert_ct = 0;
2240 
2241   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2242   b->ijonly = PETSC_FALSE;
2243 
2244   b->in_loc = 0;
2245   b->v_loc  = 0;
2246   b->n_loc  = 0;
2247 
2248   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2249   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2250   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2251   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2252 #if defined(PETSC_HAVE_ELEMENTAL)
2253   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2254 #endif
2255 
2256   B->symmetric                  = PETSC_TRUE;
2257   B->structurally_symmetric     = PETSC_TRUE;
2258   B->symmetric_set              = PETSC_TRUE;
2259   B->structurally_symmetric_set = PETSC_TRUE;
2260 
2261   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2262   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2263   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2264   if (flg) {
2265     PetscReal fact = 1.39;
2266     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2267     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2268     if (fact <= 1.0) fact = 1.39;
2269     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2270     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2271   }
2272   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2273   PetscFunctionReturn(0);
2274 }
2275 
2276 /*MC
2277    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2278 
2279    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2280    and MATMPISBAIJ otherwise.
2281 
2282    Options Database Keys:
2283 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2284 
2285   Level: beginner
2286 
2287 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
2288 M*/
2289 
2290 /*@C
2291    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2292    the user should preallocate the matrix storage by setting the parameters
2293    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2294    performance can be increased by more than a factor of 50.
2295 
2296    Collective on Mat
2297 
2298    Input Parameters:
2299 +  B - the matrix
2300 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2301           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2302 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2303            submatrix  (same for all local rows)
2304 .  d_nnz - array containing the number of block nonzeros in the various block rows
2305            in the upper triangular and diagonal part of the in diagonal portion of the local
2306            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2307            for the diagonal entry and set a value even if it is zero.
2308 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2309            submatrix (same for all local rows).
2310 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2311            off-diagonal portion of the local submatrix that is right of the diagonal
2312            (possibly different for each block row) or NULL.
2313 
2314 
2315    Options Database Keys:
2316 .   -mat_no_unroll - uses code that does not unroll the loops in the
2317                      block calculations (much slower)
2318 .   -mat_block_size - size of the blocks to use
2319 
2320    Notes:
2321 
2322    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2323    than it must be used on all processors that share the object for that argument.
2324 
2325    If the *_nnz parameter is given then the *_nz parameter is ignored
2326 
2327    Storage Information:
2328    For a square global matrix we define each processor's diagonal portion
2329    to be its local rows and the corresponding columns (a square submatrix);
2330    each processor's off-diagonal portion encompasses the remainder of the
2331    local matrix (a rectangular submatrix).
2332 
2333    The user can specify preallocated storage for the diagonal part of
2334    the local submatrix with either d_nz or d_nnz (not both).  Set
2335    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2336    memory allocation.  Likewise, specify preallocated storage for the
2337    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2338 
2339    You can call MatGetInfo() to get information on how effective the preallocation was;
2340    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2341    You can also run with the option -info and look for messages with the string
2342    malloc in them to see if additional memory allocation was needed.
2343 
2344    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2345    the figure below we depict these three local rows and all columns (0-11).
2346 
2347 .vb
2348            0 1 2 3 4 5 6 7 8 9 10 11
2349           --------------------------
2350    row 3  |. . . d d d o o o o  o  o
2351    row 4  |. . . d d d o o o o  o  o
2352    row 5  |. . . d d d o o o o  o  o
2353           --------------------------
2354 .ve
2355 
2356    Thus, any entries in the d locations are stored in the d (diagonal)
2357    submatrix, and any entries in the o locations are stored in the
2358    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2359    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2360 
2361    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2362    plus the diagonal part of the d matrix,
2363    and o_nz should indicate the number of block nonzeros per row in the o matrix
2364 
2365    In general, for PDE problems in which most nonzeros are near the diagonal,
2366    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2367    or you will get TERRIBLE performance; see the users' manual chapter on
2368    matrices.
2369 
2370    Level: intermediate
2371 
2372 .keywords: matrix, block, aij, compressed row, sparse, parallel
2373 
2374 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2375 @*/
2376 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2377 {
2378   PetscErrorCode ierr;
2379 
2380   PetscFunctionBegin;
2381   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2382   PetscValidType(B,1);
2383   PetscValidLogicalCollectiveInt(B,bs,2);
2384   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2385   PetscFunctionReturn(0);
2386 }
2387 
2388 /*@C
2389    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2390    (block compressed row).  For good matrix assembly performance
2391    the user should preallocate the matrix storage by setting the parameters
2392    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2393    performance can be increased by more than a factor of 50.
2394 
2395    Collective on MPI_Comm
2396 
2397    Input Parameters:
2398 +  comm - MPI communicator
2399 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2400           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2401 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2402            This value should be the same as the local size used in creating the
2403            y vector for the matrix-vector product y = Ax.
2404 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2405            This value should be the same as the local size used in creating the
2406            x vector for the matrix-vector product y = Ax.
2407 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2408 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2409 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2410            submatrix  (same for all local rows)
2411 .  d_nnz - array containing the number of block nonzeros in the various block rows
2412            in the upper triangular portion of the in diagonal portion of the local
2413            (possibly different for each block block row) or NULL.
2414            If you plan to factor the matrix you must leave room for the diagonal entry and
2415            set its value even if it is zero.
2416 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2417            submatrix (same for all local rows).
2418 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2419            off-diagonal portion of the local submatrix (possibly different for
2420            each block row) or NULL.
2421 
2422    Output Parameter:
2423 .  A - the matrix
2424 
2425    Options Database Keys:
2426 .   -mat_no_unroll - uses code that does not unroll the loops in the
2427                      block calculations (much slower)
2428 .   -mat_block_size - size of the blocks to use
2429 .   -mat_mpi - use the parallel matrix data structures even on one processor
2430                (defaults to using SeqBAIJ format on one processor)
2431 
2432    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2433    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2434    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2435 
2436    Notes:
2437    The number of rows and columns must be divisible by blocksize.
2438    This matrix type does not support complex Hermitian operation.
2439 
2440    The user MUST specify either the local or global matrix dimensions
2441    (possibly both).
2442 
2443    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2444    than it must be used on all processors that share the object for that argument.
2445 
2446    If the *_nnz parameter is given then the *_nz parameter is ignored
2447 
2448    Storage Information:
2449    For a square global matrix we define each processor's diagonal portion
2450    to be its local rows and the corresponding columns (a square submatrix);
2451    each processor's off-diagonal portion encompasses the remainder of the
2452    local matrix (a rectangular submatrix).
2453 
2454    The user can specify preallocated storage for the diagonal part of
2455    the local submatrix with either d_nz or d_nnz (not both).  Set
2456    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2457    memory allocation.  Likewise, specify preallocated storage for the
2458    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2459 
2460    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2461    the figure below we depict these three local rows and all columns (0-11).
2462 
2463 .vb
2464            0 1 2 3 4 5 6 7 8 9 10 11
2465           --------------------------
2466    row 3  |. . . d d d o o o o  o  o
2467    row 4  |. . . d d d o o o o  o  o
2468    row 5  |. . . d d d o o o o  o  o
2469           --------------------------
2470 .ve
2471 
2472    Thus, any entries in the d locations are stored in the d (diagonal)
2473    submatrix, and any entries in the o locations are stored in the
2474    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2475    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2476 
2477    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2478    plus the diagonal part of the d matrix,
2479    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2480    In general, for PDE problems in which most nonzeros are near the diagonal,
2481    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2482    or you will get TERRIBLE performance; see the users' manual chapter on
2483    matrices.
2484 
2485    Level: intermediate
2486 
2487 .keywords: matrix, block, aij, compressed row, sparse, parallel
2488 
2489 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2490 @*/
2491 
2492 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2493 {
2494   PetscErrorCode ierr;
2495   PetscMPIInt    size;
2496 
2497   PetscFunctionBegin;
2498   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2499   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2500   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2501   if (size > 1) {
2502     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2503     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2504   } else {
2505     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2506     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2507   }
2508   PetscFunctionReturn(0);
2509 }
2510 
2511 
2512 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2513 {
2514   Mat            mat;
2515   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2516   PetscErrorCode ierr;
2517   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2518   PetscScalar    *array;
2519 
2520   PetscFunctionBegin;
2521   *newmat = 0;
2522 
2523   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2524   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2525   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2526   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2527   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2528   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2529 
2530   mat->factortype   = matin->factortype;
2531   mat->preallocated = PETSC_TRUE;
2532   mat->assembled    = PETSC_TRUE;
2533   mat->insertmode   = NOT_SET_VALUES;
2534 
2535   a      = (Mat_MPISBAIJ*)mat->data;
2536   a->bs2 = oldmat->bs2;
2537   a->mbs = oldmat->mbs;
2538   a->nbs = oldmat->nbs;
2539   a->Mbs = oldmat->Mbs;
2540   a->Nbs = oldmat->Nbs;
2541 
2542 
2543   a->size         = oldmat->size;
2544   a->rank         = oldmat->rank;
2545   a->donotstash   = oldmat->donotstash;
2546   a->roworiented  = oldmat->roworiented;
2547   a->rowindices   = 0;
2548   a->rowvalues    = 0;
2549   a->getrowactive = PETSC_FALSE;
2550   a->barray       = 0;
2551   a->rstartbs     = oldmat->rstartbs;
2552   a->rendbs       = oldmat->rendbs;
2553   a->cstartbs     = oldmat->cstartbs;
2554   a->cendbs       = oldmat->cendbs;
2555 
2556   /* hash table stuff */
2557   a->ht           = 0;
2558   a->hd           = 0;
2559   a->ht_size      = 0;
2560   a->ht_flag      = oldmat->ht_flag;
2561   a->ht_fact      = oldmat->ht_fact;
2562   a->ht_total_ct  = 0;
2563   a->ht_insert_ct = 0;
2564 
2565   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2566   if (oldmat->colmap) {
2567 #if defined(PETSC_USE_CTABLE)
2568     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2569 #else
2570     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2571     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2572     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2573 #endif
2574   } else a->colmap = 0;
2575 
2576   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2577     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2578     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2579     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2580   } else a->garray = 0;
2581 
2582   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2583   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2584   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2585   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2586   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2587 
2588   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2589   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2590   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2591   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2592 
2593   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2594   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2595   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2596   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2597   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2598   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2599   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2600   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2601   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2602   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2603   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2604   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2605   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2606 
2607   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2608   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2609   a->sMvctx = oldmat->sMvctx;
2610   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2611 
2612   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2613   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2614   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2615   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2616   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2617   *newmat = mat;
2618   PetscFunctionReturn(0);
2619 }
2620 
2621 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2622 {
2623   PetscErrorCode ierr;
2624   PetscInt       i,nz,j,rstart,rend;
2625   PetscScalar    *vals,*buf;
2626   MPI_Comm       comm;
2627   MPI_Status     status;
2628   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2629   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2630   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2631   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2632   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2633   PetscInt       dcount,kmax,k,nzcount,tmp;
2634   int            fd;
2635 
2636   PetscFunctionBegin;
2637   /* force binary viewer to load .info file if it has not yet done so */
2638   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2639   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2640   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2641   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2642   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2643   if (bs < 0) bs = 1;
2644 
2645   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2646   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2647   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2648   if (!rank) {
2649     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2650     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2651     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2652   }
2653 
2654   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2655   M    = header[1];
2656   N    = header[2];
2657 
2658   /* If global sizes are set, check if they are consistent with that given in the file */
2659   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
2660   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
2661 
2662   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2663 
2664   /*
2665      This code adds extra rows to make sure the number of rows is
2666      divisible by the blocksize
2667   */
2668   Mbs        = M/bs;
2669   extra_rows = bs - M + bs*(Mbs);
2670   if (extra_rows == bs) extra_rows = 0;
2671   else                  Mbs++;
2672   if (extra_rows &&!rank) {
2673     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2674   }
2675 
2676   /* determine ownership of all rows */
2677   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2678     mbs = Mbs/size + ((Mbs % size) > rank);
2679     m   = mbs*bs;
2680   } else { /* User Set */
2681     m   = newmat->rmap->n;
2682     mbs = m/bs;
2683   }
2684   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2685   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2686   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2687   rowners[0] = 0;
2688   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2689   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2690   rstart = rowners[rank];
2691   rend   = rowners[rank+1];
2692 
2693   /* distribute row lengths to all processors */
2694   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2695   if (!rank) {
2696     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2697     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2698     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2699     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2700     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2701     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2702     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2703   } else {
2704     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2705   }
2706 
2707   if (!rank) {   /* procs[0] */
2708     /* calculate the number of nonzeros on each processor */
2709     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2710     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2711     for (i=0; i<size; i++) {
2712       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2713         procsnz[i] += rowlengths[j];
2714       }
2715     }
2716     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2717 
2718     /* determine max buffer needed and allocate it */
2719     maxnz = 0;
2720     for (i=0; i<size; i++) {
2721       maxnz = PetscMax(maxnz,procsnz[i]);
2722     }
2723     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2724 
2725     /* read in my part of the matrix column indices  */
2726     nz     = procsnz[0];
2727     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2728     mycols = ibuf;
2729     if (size == 1) nz -= extra_rows;
2730     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2731     if (size == 1) {
2732       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2733     }
2734 
2735     /* read in every ones (except the last) and ship off */
2736     for (i=1; i<size-1; i++) {
2737       nz   = procsnz[i];
2738       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2739       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2740     }
2741     /* read in the stuff for the last proc */
2742     if (size != 1) {
2743       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2744       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2745       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2746       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2747     }
2748     ierr = PetscFree(cols);CHKERRQ(ierr);
2749   } else {  /* procs[i], i>0 */
2750     /* determine buffer space needed for message */
2751     nz = 0;
2752     for (i=0; i<m; i++) nz += locrowlens[i];
2753     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2754     mycols = ibuf;
2755     /* receive message of column indices*/
2756     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2757     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2758     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2759   }
2760 
2761   /* loop over local rows, determining number of off diagonal entries */
2762   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2763   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2764   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2765   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2766   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2767   rowcount = 0;
2768   nzcount  = 0;
2769   for (i=0; i<mbs; i++) {
2770     dcount  = 0;
2771     odcount = 0;
2772     for (j=0; j<bs; j++) {
2773       kmax = locrowlens[rowcount];
2774       for (k=0; k<kmax; k++) {
2775         tmp = mycols[nzcount++]/bs; /* block col. index */
2776         if (!mask[tmp]) {
2777           mask[tmp] = 1;
2778           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2779           else masked1[dcount++] = tmp; /* entry in diag portion */
2780         }
2781       }
2782       rowcount++;
2783     }
2784 
2785     dlens[i]  = dcount;  /* d_nzz[i] */
2786     odlens[i] = odcount; /* o_nzz[i] */
2787 
2788     /* zero out the mask elements we set */
2789     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2790     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2791   }
2792   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2793   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2794   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2795 
2796   if (!rank) {
2797     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2798     /* read in my part of the matrix numerical values  */
2799     nz     = procsnz[0];
2800     vals   = buf;
2801     mycols = ibuf;
2802     if (size == 1) nz -= extra_rows;
2803     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2804     if (size == 1) {
2805       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2806     }
2807 
2808     /* insert into matrix */
2809     jj = rstart*bs;
2810     for (i=0; i<m; i++) {
2811       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2812       mycols += locrowlens[i];
2813       vals   += locrowlens[i];
2814       jj++;
2815     }
2816 
2817     /* read in other processors (except the last one) and ship out */
2818     for (i=1; i<size-1; i++) {
2819       nz   = procsnz[i];
2820       vals = buf;
2821       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2822       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2823     }
2824     /* the last proc */
2825     if (size != 1) {
2826       nz   = procsnz[i] - extra_rows;
2827       vals = buf;
2828       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2829       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2830       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2831     }
2832     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2833 
2834   } else {
2835     /* receive numeric values */
2836     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2837 
2838     /* receive message of values*/
2839     vals   = buf;
2840     mycols = ibuf;
2841     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2842     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2843     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2844 
2845     /* insert into matrix */
2846     jj = rstart*bs;
2847     for (i=0; i<m; i++) {
2848       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2849       mycols += locrowlens[i];
2850       vals   += locrowlens[i];
2851       jj++;
2852     }
2853   }
2854 
2855   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2856   ierr = PetscFree(buf);CHKERRQ(ierr);
2857   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2858   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2859   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2860   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2861   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2862   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2863   PetscFunctionReturn(0);
2864 }
2865 
2866 /*XXXXX@
2867    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2868 
2869    Input Parameters:
2870 .  mat  - the matrix
2871 .  fact - factor
2872 
2873    Not Collective on Mat, each process can have a different hash factor
2874 
2875    Level: advanced
2876 
2877   Notes:
2878    This can also be set by the command line option: -mat_use_hash_table fact
2879 
2880 .keywords: matrix, hashtable, factor, HT
2881 
2882 .seealso: MatSetOption()
2883 @XXXXX*/
2884 
2885 
2886 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2887 {
2888   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2889   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2890   PetscReal      atmp;
2891   PetscReal      *work,*svalues,*rvalues;
2892   PetscErrorCode ierr;
2893   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2894   PetscMPIInt    rank,size;
2895   PetscInt       *rowners_bs,dest,count,source;
2896   PetscScalar    *va;
2897   MatScalar      *ba;
2898   MPI_Status     stat;
2899 
2900   PetscFunctionBegin;
2901   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2902   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2903   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2904 
2905   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2906   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2907 
2908   bs  = A->rmap->bs;
2909   mbs = a->mbs;
2910   Mbs = a->Mbs;
2911   ba  = b->a;
2912   bi  = b->i;
2913   bj  = b->j;
2914 
2915   /* find ownerships */
2916   rowners_bs = A->rmap->range;
2917 
2918   /* each proc creates an array to be distributed */
2919   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2920   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2921 
2922   /* row_max for B */
2923   if (rank != size-1) {
2924     for (i=0; i<mbs; i++) {
2925       ncols = bi[1] - bi[0]; bi++;
2926       brow  = bs*i;
2927       for (j=0; j<ncols; j++) {
2928         bcol = bs*(*bj);
2929         for (kcol=0; kcol<bs; kcol++) {
2930           col  = bcol + kcol;                /* local col index */
2931           col += rowners_bs[rank+1];      /* global col index */
2932           for (krow=0; krow<bs; krow++) {
2933             atmp = PetscAbsScalar(*ba); ba++;
2934             row  = brow + krow;   /* local row index */
2935             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2936             if (work[col] < atmp) work[col] = atmp;
2937           }
2938         }
2939         bj++;
2940       }
2941     }
2942 
2943     /* send values to its owners */
2944     for (dest=rank+1; dest<size; dest++) {
2945       svalues = work + rowners_bs[dest];
2946       count   = rowners_bs[dest+1]-rowners_bs[dest];
2947       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2948     }
2949   }
2950 
2951   /* receive values */
2952   if (rank) {
2953     rvalues = work;
2954     count   = rowners_bs[rank+1]-rowners_bs[rank];
2955     for (source=0; source<rank; source++) {
2956       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2957       /* process values */
2958       for (i=0; i<count; i++) {
2959         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2960       }
2961     }
2962   }
2963 
2964   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2965   ierr = PetscFree(work);CHKERRQ(ierr);
2966   PetscFunctionReturn(0);
2967 }
2968 
2969 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2970 {
2971   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2972   PetscErrorCode    ierr;
2973   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2974   PetscScalar       *x,*ptr,*from;
2975   Vec               bb1;
2976   const PetscScalar *b;
2977 
2978   PetscFunctionBegin;
2979   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2980   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2981 
2982   if (flag == SOR_APPLY_UPPER) {
2983     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2984     PetscFunctionReturn(0);
2985   }
2986 
2987   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2988     if (flag & SOR_ZERO_INITIAL_GUESS) {
2989       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2990       its--;
2991     }
2992 
2993     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2994     while (its--) {
2995 
2996       /* lower triangular part: slvec0b = - B^T*xx */
2997       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2998 
2999       /* copy xx into slvec0a */
3000       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
3001       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3002       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3003       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
3004 
3005       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
3006 
3007       /* copy bb into slvec1a */
3008       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3009       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3010       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3011       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3012 
3013       /* set slvec1b = 0 */
3014       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3015 
3016       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3017       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3018       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3019       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3020 
3021       /* upper triangular part: bb1 = bb1 - B*x */
3022       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
3023 
3024       /* local diagonal sweep */
3025       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3026     }
3027     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3028   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3029     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3030   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3031     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3032   } else if (flag & SOR_EISENSTAT) {
3033     Vec               xx1;
3034     PetscBool         hasop;
3035     const PetscScalar *diag;
3036     PetscScalar       *sl,scale = (omega - 2.0)/omega;
3037     PetscInt          i,n;
3038 
3039     if (!mat->xx1) {
3040       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
3041       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
3042     }
3043     xx1 = mat->xx1;
3044     bb1 = mat->bb1;
3045 
3046     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
3047 
3048     if (!mat->diag) {
3049       /* this is wrong for same matrix with new nonzero values */
3050       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
3051       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
3052     }
3053     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
3054 
3055     if (hasop) {
3056       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
3057       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3058     } else {
3059       /*
3060           These two lines are replaced by code that may be a bit faster for a good compiler
3061       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
3062       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3063       */
3064       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3065       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3066       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3067       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3068       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
3069       if (omega == 1.0) {
3070         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
3071         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
3072       } else {
3073         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
3074         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
3075       }
3076       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3077       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3078       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3079       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3080     }
3081 
3082     /* multiply off-diagonal portion of matrix */
3083     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3084     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
3085     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
3086     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3087     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3088     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
3089     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3090     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3091     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3092     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
3093 
3094     /* local sweep */
3095     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
3096     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
3097   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3098   PetscFunctionReturn(0);
3099 }
3100 
3101 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
3102 {
3103   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
3104   PetscErrorCode ierr;
3105   Vec            lvec1,bb1;
3106 
3107   PetscFunctionBegin;
3108   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3109   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3110 
3111   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3112     if (flag & SOR_ZERO_INITIAL_GUESS) {
3113       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
3114       its--;
3115     }
3116 
3117     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
3118     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
3119     while (its--) {
3120       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3121 
3122       /* lower diagonal part: bb1 = bb - B^T*xx */
3123       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
3124       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
3125 
3126       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3127       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
3128       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3129 
3130       /* upper diagonal part: bb1 = bb1 - B*x */
3131       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
3132       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
3133 
3134       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3135 
3136       /* diagonal sweep */
3137       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3138     }
3139     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
3140     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3141   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3142   PetscFunctionReturn(0);
3143 }
3144 
3145 /*@
3146      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
3147          CSR format the local rows.
3148 
3149    Collective on MPI_Comm
3150 
3151    Input Parameters:
3152 +  comm - MPI communicator
3153 .  bs - the block size, only a block size of 1 is supported
3154 .  m - number of local rows (Cannot be PETSC_DECIDE)
3155 .  n - This value should be the same as the local size used in creating the
3156        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3157        calculated if N is given) For square matrices n is almost always m.
3158 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3159 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3160 .   i - row indices
3161 .   j - column indices
3162 -   a - matrix values
3163 
3164    Output Parameter:
3165 .   mat - the matrix
3166 
3167    Level: intermediate
3168 
3169    Notes:
3170        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3171      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3172      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3173 
3174        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3175 
3176 .keywords: matrix, aij, compressed row, sparse, parallel
3177 
3178 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3179           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3180 @*/
3181 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3182 {
3183   PetscErrorCode ierr;
3184 
3185 
3186   PetscFunctionBegin;
3187   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3188   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3189   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3190   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3191   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3192   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3193   PetscFunctionReturn(0);
3194 }
3195 
3196 
3197 /*@C
3198    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
3199    (the default parallel PETSc format).
3200 
3201    Collective on MPI_Comm
3202 
3203    Input Parameters:
3204 +  B - the matrix
3205 .  bs - the block size
3206 .  i - the indices into j for the start of each local row (starts with zero)
3207 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3208 -  v - optional values in the matrix
3209 
3210    Level: developer
3211 
3212 .keywords: matrix, aij, compressed row, sparse, parallel
3213 
3214 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
3215 @*/
3216 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3217 {
3218   PetscErrorCode ierr;
3219 
3220   PetscFunctionBegin;
3221   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3222   PetscFunctionReturn(0);
3223 }
3224 
3225 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3226 {
3227   PetscErrorCode ierr;
3228   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3229   PetscInt       *indx;
3230   PetscScalar    *values;
3231 
3232   PetscFunctionBegin;
3233   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3234   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3235     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
3236     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
3237     PetscInt       *bindx,rmax=a->rmax,j;
3238 
3239     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3240     mbs = m/bs; Nbs = N/cbs;
3241     if (n == PETSC_DECIDE) {
3242       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
3243     }
3244     /* Check sum(n) = Nbs */
3245     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3246     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
3247 
3248     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3249     rstart -= mbs;
3250 
3251     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3252     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
3253     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3254     for (i=0; i<mbs; i++) {
3255       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3256       nnz = nnz/bs;
3257       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3258       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3259       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3260     }
3261     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3262     ierr = PetscFree(bindx);CHKERRQ(ierr);
3263 
3264     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3265     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3266     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3267     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
3268     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3269     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3270   }
3271 
3272   /* numeric phase */
3273   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3274   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3275 
3276   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3277   for (i=0; i<m; i++) {
3278     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3279     Ii   = i + rstart;
3280     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3281     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3282   }
3283   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3284   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3285   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3286   PetscFunctionReturn(0);
3287 }
3288