xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 5e71baeff2f3138f93cd4f5927dfd596eb8325cc)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
11 {
12   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
13   PetscErrorCode ierr;
14 
15   PetscFunctionBegin;
16   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
17   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
18   PetscFunctionReturn(0);
19 }
20 
21 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
22 {
23   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
28   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
29   PetscFunctionReturn(0);
30 }
31 
32 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
33   { \
34  \
35     brow = row/bs;  \
36     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
37     rmax = aimax[brow]; nrow = ailen[brow]; \
38     bcol = col/bs; \
39     ridx = row % bs; cidx = col % bs; \
40     low  = 0; high = nrow; \
41     while (high-low > 3) { \
42       t = (low+high)/2; \
43       if (rp[t] > bcol) high = t; \
44       else              low  = t; \
45     } \
46     for (_i=low; _i<high; _i++) { \
47       if (rp[_i] > bcol) break; \
48       if (rp[_i] == bcol) { \
49         bap = ap + bs2*_i + bs*cidx + ridx; \
50         if (addv == ADD_VALUES) *bap += value;  \
51         else                    *bap  = value;  \
52         goto a_noinsert; \
53       } \
54     } \
55     if (a->nonew == 1) goto a_noinsert; \
56     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
57     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
58     N = nrow++ - 1;  \
59     /* shift up all the later entries in this row */ \
60     for (ii=N; ii>=_i; ii--) { \
61       rp[ii+1] = rp[ii]; \
62       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
63     } \
64     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
65     rp[_i]                      = bcol;  \
66     ap[bs2*_i + bs*cidx + ridx] = value;  \
67     A->nonzerostate++;\
68 a_noinsert:; \
69     ailen[brow] = nrow; \
70   }
71 
72 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
73   { \
74     brow = row/bs;  \
75     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
76     rmax = bimax[brow]; nrow = bilen[brow]; \
77     bcol = col/bs; \
78     ridx = row % bs; cidx = col % bs; \
79     low  = 0; high = nrow; \
80     while (high-low > 3) { \
81       t = (low+high)/2; \
82       if (rp[t] > bcol) high = t; \
83       else              low  = t; \
84     } \
85     for (_i=low; _i<high; _i++) { \
86       if (rp[_i] > bcol) break; \
87       if (rp[_i] == bcol) { \
88         bap = ap + bs2*_i + bs*cidx + ridx; \
89         if (addv == ADD_VALUES) *bap += value;  \
90         else                    *bap  = value;  \
91         goto b_noinsert; \
92       } \
93     } \
94     if (b->nonew == 1) goto b_noinsert; \
95     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
96     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
97     N = nrow++ - 1;  \
98     /* shift up all the later entries in this row */ \
99     for (ii=N; ii>=_i; ii--) { \
100       rp[ii+1] = rp[ii]; \
101       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
102     } \
103     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
104     rp[_i]                      = bcol;  \
105     ap[bs2*_i + bs*cidx + ridx] = value;  \
106     B->nonzerostate++;\
107 b_noinsert:; \
108     bilen[brow] = nrow; \
109   }
110 
111 /* Only add/insert a(i,j) with i<=j (blocks).
112    Any a(i,j) with i>j input by user is ingored.
113 */
114 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
115 {
116   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
117   MatScalar      value;
118   PetscBool      roworiented = baij->roworiented;
119   PetscErrorCode ierr;
120   PetscInt       i,j,row,col;
121   PetscInt       rstart_orig=mat->rmap->rstart;
122   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
123   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
124 
125   /* Some Variables required in the macro */
126   Mat          A     = baij->A;
127   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
128   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
129   MatScalar    *aa   =a->a;
130 
131   Mat         B     = baij->B;
132   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
133   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
134   MatScalar   *ba   =b->a;
135 
136   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
137   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
138   MatScalar *ap,*bap;
139 
140   /* for stash */
141   PetscInt  n_loc, *in_loc = NULL;
142   MatScalar *v_loc = NULL;
143 
144   PetscFunctionBegin;
145   if (!baij->donotstash) {
146     if (n > baij->n_loc) {
147       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
148       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
149       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
150       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
151 
152       baij->n_loc = n;
153     }
154     in_loc = baij->in_loc;
155     v_loc  = baij->v_loc;
156   }
157 
158   for (i=0; i<m; i++) {
159     if (im[i] < 0) continue;
160 #if defined(PETSC_USE_DEBUG)
161     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
162 #endif
163     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
164       row = im[i] - rstart_orig;              /* local row index */
165       for (j=0; j<n; j++) {
166         if (im[i]/bs > in[j]/bs) {
167           if (a->ignore_ltriangular) {
168             continue;    /* ignore lower triangular blocks */
169           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
170         }
171         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
172           col  = in[j] - cstart_orig;         /* local col index */
173           brow = row/bs; bcol = col/bs;
174           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
175           if (roworiented) value = v[i*n+j];
176           else             value = v[i+j*m];
177           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
178           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
179         } else if (in[j] < 0) continue;
180 #if defined(PETSC_USE_DEBUG)
181         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
182 #endif
183         else {  /* off-diag entry (B) */
184           if (mat->was_assembled) {
185             if (!baij->colmap) {
186               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
187             }
188 #if defined(PETSC_USE_CTABLE)
189             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
190             col  = col - 1;
191 #else
192             col = baij->colmap[in[j]/bs] - 1;
193 #endif
194             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
195               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
196               col  =  in[j];
197               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
198               B    = baij->B;
199               b    = (Mat_SeqBAIJ*)(B)->data;
200               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
201               ba   = b->a;
202             } else col += in[j]%bs;
203           } else col = in[j];
204           if (roworiented) value = v[i*n+j];
205           else             value = v[i+j*m];
206           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
207           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
208         }
209       }
210     } else {  /* off processor entry */
211       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
212       if (!baij->donotstash) {
213         mat->assembled = PETSC_FALSE;
214         n_loc          = 0;
215         for (j=0; j<n; j++) {
216           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
217           in_loc[n_loc] = in[j];
218           if (roworiented) {
219             v_loc[n_loc] = v[i*n+j];
220           } else {
221             v_loc[n_loc] = v[j*m+i];
222           }
223           n_loc++;
224         }
225         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
226       }
227     }
228   }
229   PetscFunctionReturn(0);
230 }
231 
232 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
233 {
234   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
235   PetscErrorCode    ierr;
236   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
237   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
238   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
239   PetscBool         roworiented=a->roworiented;
240   const PetscScalar *value     = v;
241   MatScalar         *ap,*aa = a->a,*bap;
242 
243   PetscFunctionBegin;
244   if (col < row) {
245     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
246     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
247   }
248   rp   = aj + ai[row];
249   ap   = aa + bs2*ai[row];
250   rmax = imax[row];
251   nrow = ailen[row];
252   value = v;
253   low   = 0;
254   high  = nrow;
255 
256   while (high-low > 7) {
257     t = (low+high)/2;
258     if (rp[t] > col) high = t;
259     else             low  = t;
260   }
261   for (i=low; i<high; i++) {
262     if (rp[i] > col) break;
263     if (rp[i] == col) {
264       bap = ap +  bs2*i;
265       if (roworiented) {
266         if (is == ADD_VALUES) {
267           for (ii=0; ii<bs; ii++) {
268             for (jj=ii; jj<bs2; jj+=bs) {
269               bap[jj] += *value++;
270             }
271           }
272         } else {
273           for (ii=0; ii<bs; ii++) {
274             for (jj=ii; jj<bs2; jj+=bs) {
275               bap[jj] = *value++;
276             }
277           }
278         }
279       } else {
280         if (is == ADD_VALUES) {
281           for (ii=0; ii<bs; ii++) {
282             for (jj=0; jj<bs; jj++) {
283               *bap++ += *value++;
284             }
285           }
286         } else {
287           for (ii=0; ii<bs; ii++) {
288             for (jj=0; jj<bs; jj++) {
289               *bap++  = *value++;
290             }
291           }
292         }
293       }
294       goto noinsert2;
295     }
296   }
297   if (nonew == 1) goto noinsert2;
298   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
299   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
300   N = nrow++ - 1; high++;
301   /* shift up all the later entries in this row */
302   for (ii=N; ii>=i; ii--) {
303     rp[ii+1] = rp[ii];
304     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
305   }
306   if (N >= i) {
307     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
308   }
309   rp[i] = col;
310   bap   = ap +  bs2*i;
311   if (roworiented) {
312     for (ii=0; ii<bs; ii++) {
313       for (jj=ii; jj<bs2; jj+=bs) {
314         bap[jj] = *value++;
315       }
316     }
317   } else {
318     for (ii=0; ii<bs; ii++) {
319       for (jj=0; jj<bs; jj++) {
320         *bap++ = *value++;
321       }
322     }
323   }
324   noinsert2:;
325   ailen[row] = nrow;
326   PetscFunctionReturn(0);
327 }
328 
329 /*
330    This routine is exactly duplicated in mpibaij.c
331 */
332 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
333 {
334   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
335   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
336   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
337   PetscErrorCode    ierr;
338   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
339   PetscBool         roworiented=a->roworiented;
340   const PetscScalar *value     = v;
341   MatScalar         *ap,*aa = a->a,*bap;
342 
343   PetscFunctionBegin;
344   rp   = aj + ai[row];
345   ap   = aa + bs2*ai[row];
346   rmax = imax[row];
347   nrow = ailen[row];
348   low  = 0;
349   high = nrow;
350   value = v;
351   while (high-low > 7) {
352     t = (low+high)/2;
353     if (rp[t] > col) high = t;
354     else             low  = t;
355   }
356   for (i=low; i<high; i++) {
357     if (rp[i] > col) break;
358     if (rp[i] == col) {
359       bap = ap +  bs2*i;
360       if (roworiented) {
361         if (is == ADD_VALUES) {
362           for (ii=0; ii<bs; ii++) {
363             for (jj=ii; jj<bs2; jj+=bs) {
364               bap[jj] += *value++;
365             }
366           }
367         } else {
368           for (ii=0; ii<bs; ii++) {
369             for (jj=ii; jj<bs2; jj+=bs) {
370               bap[jj] = *value++;
371             }
372           }
373         }
374       } else {
375         if (is == ADD_VALUES) {
376           for (ii=0; ii<bs; ii++,value+=bs) {
377             for (jj=0; jj<bs; jj++) {
378               bap[jj] += value[jj];
379             }
380             bap += bs;
381           }
382         } else {
383           for (ii=0; ii<bs; ii++,value+=bs) {
384             for (jj=0; jj<bs; jj++) {
385               bap[jj]  = value[jj];
386             }
387             bap += bs;
388           }
389         }
390       }
391       goto noinsert2;
392     }
393   }
394   if (nonew == 1) goto noinsert2;
395   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
396   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
397   N = nrow++ - 1; high++;
398   /* shift up all the later entries in this row */
399   for (ii=N; ii>=i; ii--) {
400     rp[ii+1] = rp[ii];
401     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
402   }
403   if (N >= i) {
404     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
405   }
406   rp[i] = col;
407   bap   = ap +  bs2*i;
408   if (roworiented) {
409     for (ii=0; ii<bs; ii++) {
410       for (jj=ii; jj<bs2; jj+=bs) {
411         bap[jj] = *value++;
412       }
413     }
414   } else {
415     for (ii=0; ii<bs; ii++) {
416       for (jj=0; jj<bs; jj++) {
417         *bap++ = *value++;
418       }
419     }
420   }
421   noinsert2:;
422   ailen[row] = nrow;
423   PetscFunctionReturn(0);
424 }
425 
426 /*
427     This routine could be optimized by removing the need for the block copy below and passing stride information
428   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
429 */
430 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
431 {
432   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
433   const MatScalar *value;
434   MatScalar       *barray     =baij->barray;
435   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
436   PetscErrorCode  ierr;
437   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
438   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
439   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
440 
441   PetscFunctionBegin;
442   if (!barray) {
443     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
444     baij->barray = barray;
445   }
446 
447   if (roworiented) {
448     stepval = (n-1)*bs;
449   } else {
450     stepval = (m-1)*bs;
451   }
452   for (i=0; i<m; i++) {
453     if (im[i] < 0) continue;
454 #if defined(PETSC_USE_DEBUG)
455     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
456 #endif
457     if (im[i] >= rstart && im[i] < rend) {
458       row = im[i] - rstart;
459       for (j=0; j<n; j++) {
460         if (im[i] > in[j]) {
461           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
462           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
463         }
464         /* If NumCol = 1 then a copy is not required */
465         if ((roworiented) && (n == 1)) {
466           barray = (MatScalar*) v + i*bs2;
467         } else if ((!roworiented) && (m == 1)) {
468           barray = (MatScalar*) v + j*bs2;
469         } else { /* Here a copy is required */
470           if (roworiented) {
471             value = v + i*(stepval+bs)*bs + j*bs;
472           } else {
473             value = v + j*(stepval+bs)*bs + i*bs;
474           }
475           for (ii=0; ii<bs; ii++,value+=stepval) {
476             for (jj=0; jj<bs; jj++) {
477               *barray++ = *value++;
478             }
479           }
480           barray -=bs2;
481         }
482 
483         if (in[j] >= cstart && in[j] < cend) {
484           col  = in[j] - cstart;
485           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
486         } else if (in[j] < 0) continue;
487 #if defined(PETSC_USE_DEBUG)
488         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
489 #endif
490         else {
491           if (mat->was_assembled) {
492             if (!baij->colmap) {
493               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
494             }
495 
496 #if defined(PETSC_USE_DEBUG)
497 #if defined(PETSC_USE_CTABLE)
498             { PetscInt data;
499               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
500               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
501             }
502 #else
503             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
504 #endif
505 #endif
506 #if defined(PETSC_USE_CTABLE)
507             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
508             col  = (col - 1)/bs;
509 #else
510             col = (baij->colmap[in[j]] - 1)/bs;
511 #endif
512             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
513               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
514               col  = in[j];
515             }
516           } else col = in[j];
517           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
518         }
519       }
520     } else {
521       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
522       if (!baij->donotstash) {
523         if (roworiented) {
524           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
525         } else {
526           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
527         }
528       }
529     }
530   }
531   PetscFunctionReturn(0);
532 }
533 
534 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
535 {
536   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
537   PetscErrorCode ierr;
538   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
539   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
540 
541   PetscFunctionBegin;
542   for (i=0; i<m; i++) {
543     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
544     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
545     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
546       row = idxm[i] - bsrstart;
547       for (j=0; j<n; j++) {
548         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
549         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
550         if (idxn[j] >= bscstart && idxn[j] < bscend) {
551           col  = idxn[j] - bscstart;
552           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
553         } else {
554           if (!baij->colmap) {
555             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
556           }
557 #if defined(PETSC_USE_CTABLE)
558           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
559           data--;
560 #else
561           data = baij->colmap[idxn[j]/bs]-1;
562 #endif
563           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
564           else {
565             col  = data + idxn[j]%bs;
566             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
567           }
568         }
569       }
570     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
571   }
572   PetscFunctionReturn(0);
573 }
574 
575 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
576 {
577   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
578   PetscErrorCode ierr;
579   PetscReal      sum[2],*lnorm2;
580 
581   PetscFunctionBegin;
582   if (baij->size == 1) {
583     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
584   } else {
585     if (type == NORM_FROBENIUS) {
586       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
587       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
588       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
589       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
590       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
591       ierr    = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
592       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
593       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
594     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
595       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
596       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
597       PetscReal    *rsum,*rsum2,vabs;
598       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
599       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
600       MatScalar    *v;
601 
602       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
603       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
604       /* Amat */
605       v = amat->a; jj = amat->j;
606       for (brow=0; brow<mbs; brow++) {
607         grow = bs*(rstart + brow);
608         nz   = amat->i[brow+1] - amat->i[brow];
609         for (bcol=0; bcol<nz; bcol++) {
610           gcol = bs*(rstart + *jj); jj++;
611           for (col=0; col<bs; col++) {
612             for (row=0; row<bs; row++) {
613               vabs            = PetscAbsScalar(*v); v++;
614               rsum[gcol+col] += vabs;
615               /* non-diagonal block */
616               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
617             }
618           }
619         }
620         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
621       }
622       /* Bmat */
623       v = bmat->a; jj = bmat->j;
624       for (brow=0; brow<mbs; brow++) {
625         grow = bs*(rstart + brow);
626         nz = bmat->i[brow+1] - bmat->i[brow];
627         for (bcol=0; bcol<nz; bcol++) {
628           gcol = bs*garray[*jj]; jj++;
629           for (col=0; col<bs; col++) {
630             for (row=0; row<bs; row++) {
631               vabs            = PetscAbsScalar(*v); v++;
632               rsum[gcol+col] += vabs;
633               rsum[grow+row] += vabs;
634             }
635           }
636         }
637         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
638       }
639       ierr  = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
640       *norm = 0.0;
641       for (col=0; col<mat->cmap->N; col++) {
642         if (rsum2[col] > *norm) *norm = rsum2[col];
643       }
644       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
645     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
646   }
647   PetscFunctionReturn(0);
648 }
649 
650 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
651 {
652   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
653   PetscErrorCode ierr;
654   PetscInt       nstash,reallocs;
655 
656   PetscFunctionBegin;
657   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
658 
659   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
660   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
661   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
662   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
663   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
664   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
665   PetscFunctionReturn(0);
666 }
667 
668 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
669 {
670   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
671   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
672   PetscErrorCode ierr;
673   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
674   PetscInt       *row,*col;
675   PetscBool      other_disassembled;
676   PetscMPIInt    n;
677   PetscBool      r1,r2,r3;
678   MatScalar      *val;
679 
680   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
681   PetscFunctionBegin;
682   if (!baij->donotstash &&  !mat->nooffprocentries) {
683     while (1) {
684       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
685       if (!flg) break;
686 
687       for (i=0; i<n;) {
688         /* Now identify the consecutive vals belonging to the same row */
689         for (j=i,rstart=row[j]; j<n; j++) {
690           if (row[j] != rstart) break;
691         }
692         if (j < n) ncols = j-i;
693         else       ncols = n-i;
694         /* Now assemble all these values with a single function call */
695         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
696         i    = j;
697       }
698     }
699     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
700     /* Now process the block-stash. Since the values are stashed column-oriented,
701        set the roworiented flag to column oriented, and after MatSetValues()
702        restore the original flags */
703     r1 = baij->roworiented;
704     r2 = a->roworiented;
705     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
706 
707     baij->roworiented = PETSC_FALSE;
708     a->roworiented    = PETSC_FALSE;
709 
710     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
711     while (1) {
712       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
713       if (!flg) break;
714 
715       for (i=0; i<n;) {
716         /* Now identify the consecutive vals belonging to the same row */
717         for (j=i,rstart=row[j]; j<n; j++) {
718           if (row[j] != rstart) break;
719         }
720         if (j < n) ncols = j-i;
721         else       ncols = n-i;
722         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
723         i    = j;
724       }
725     }
726     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
727 
728     baij->roworiented = r1;
729     a->roworiented    = r2;
730 
731     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
732   }
733 
734   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
735   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
736 
737   /* determine if any processor has disassembled, if so we must
738      also disassemble ourselfs, in order that we may reassemble. */
739   /*
740      if nonzero structure of submatrix B cannot change then we know that
741      no processor disassembled thus we can skip this stuff
742   */
743   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
744     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
745     if (mat->was_assembled && !other_disassembled) {
746       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
747     }
748   }
749 
750   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
751     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
752   }
753   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
754   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
755 
756   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
757 
758   baij->rowvalues = 0;
759 
760   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
761   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
762     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
763     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
769 #include <petscdraw.h>
770 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
771 {
772   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
773   PetscErrorCode    ierr;
774   PetscInt          bs   = mat->rmap->bs;
775   PetscMPIInt       rank = baij->rank;
776   PetscBool         iascii,isdraw;
777   PetscViewer       sviewer;
778   PetscViewerFormat format;
779 
780   PetscFunctionBegin;
781   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
782   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
783   if (iascii) {
784     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
785     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
786       MatInfo info;
787       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
788       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
789       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
790       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
791       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
792       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
793       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
794       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
795       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
796       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
797       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
798       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
799       PetscFunctionReturn(0);
800     } else if (format == PETSC_VIEWER_ASCII_INFO) {
801       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
802       PetscFunctionReturn(0);
803     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
804       PetscFunctionReturn(0);
805     }
806   }
807 
808   if (isdraw) {
809     PetscDraw draw;
810     PetscBool isnull;
811     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
812     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
813     if (isnull) PetscFunctionReturn(0);
814   }
815 
816   {
817     /* assemble the entire matrix onto first processor. */
818     Mat          A;
819     Mat_SeqSBAIJ *Aloc;
820     Mat_SeqBAIJ  *Bloc;
821     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
822     MatScalar    *a;
823     const char   *matname;
824 
825     /* Should this be the same type as mat? */
826     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
827     if (!rank) {
828       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
829     } else {
830       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
831     }
832     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
833     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
834     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
835     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
836 
837     /* copy over the A part */
838     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
839     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
840     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
841 
842     for (i=0; i<mbs; i++) {
843       rvals[0] = bs*(baij->rstartbs + i);
844       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
845       for (j=ai[i]; j<ai[i+1]; j++) {
846         col = (baij->cstartbs+aj[j])*bs;
847         for (k=0; k<bs; k++) {
848           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
849           col++;
850           a += bs;
851         }
852       }
853     }
854     /* copy over the B part */
855     Bloc = (Mat_SeqBAIJ*)baij->B->data;
856     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
857     for (i=0; i<mbs; i++) {
858 
859       rvals[0] = bs*(baij->rstartbs + i);
860       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
861       for (j=ai[i]; j<ai[i+1]; j++) {
862         col = baij->garray[aj[j]]*bs;
863         for (k=0; k<bs; k++) {
864           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
865           col++;
866           a += bs;
867         }
868       }
869     }
870     ierr = PetscFree(rvals);CHKERRQ(ierr);
871     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
872     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
873     /*
874        Everyone has to call to draw the matrix since the graphics waits are
875        synchronized across all processors that share the PetscDraw object
876     */
877     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
878     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
879     if (!rank) {
880       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
881       ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
882     }
883     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
884     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
885     ierr = MatDestroy(&A);CHKERRQ(ierr);
886   }
887   PetscFunctionReturn(0);
888 }
889 
890 static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer)
891 {
892   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)mat->data;
893   Mat_SeqSBAIJ   *A = (Mat_SeqSBAIJ*)a->A->data;
894   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
895   PetscErrorCode ierr;
896   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
897   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
898   int            fd;
899   PetscScalar    *column_values;
900   FILE           *file;
901   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
902   PetscInt       message_count,flowcontrolcount;
903 
904   PetscFunctionBegin;
905   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
906   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
907   nz   = bs2*(A->nz + B->nz);
908   rlen = mat->rmap->n;
909   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
910   if (!rank) {
911     header[0] = MAT_FILE_CLASSID;
912     header[1] = mat->rmap->N;
913     header[2] = mat->cmap->N;
914 
915     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
916     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
917     /* get largest number of rows any processor has */
918     range = mat->rmap->range;
919     for (i=1; i<size; i++) {
920       rlen = PetscMax(rlen,range[i+1] - range[i]);
921     }
922   } else {
923     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
924   }
925 
926   ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr);
927   /* compute lengths of each row  */
928   for (i=0; i<a->mbs; i++) {
929     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
930   }
931   /* store the row lengths to the file */
932   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
933   if (!rank) {
934     MPI_Status status;
935     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
936     rlen = (range[1] - range[0])/bs;
937     for (i=0; i<rlen; i++) {
938       for (j=0; j<bs; j++) {
939         row_lens[i*bs+j] = bs*crow_lens[i];
940       }
941     }
942     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
943     for (i=1; i<size; i++) {
944       rlen = (range[i+1] - range[i])/bs;
945       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
946       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
947       for (k=0; k<rlen; k++) {
948         for (j=0; j<bs; j++) {
949           row_lens[k*bs+j] = bs*crow_lens[k];
950         }
951       }
952       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
953     }
954     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
955     ierr = PetscFree(row_lens);CHKERRQ(ierr);
956   } else {
957     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
958     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
959     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
960   }
961   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
962 
963   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
964      information needed to make it for each row from a block row. This does require more communication but still not more than
965      the communication needed for the nonzero values  */
966   nzmax = nz; /*  space a largest processor needs */
967   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
968   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
969   cnt   = 0;
970   for (i=0; i<a->mbs; i++) {
971     pcnt = cnt;
972     for (j=B->i[i]; j<B->i[i+1]; j++) {
973       if ((col = garray[B->j[j]]) > cstart) break;
974       for (l=0; l<bs; l++) {
975         column_indices[cnt++] = bs*col+l;
976       }
977     }
978     for (k=A->i[i]; k<A->i[i+1]; k++) {
979       for (l=0; l<bs; l++) {
980         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
981       }
982     }
983     for (; j<B->i[i+1]; j++) {
984       for (l=0; l<bs; l++) {
985         column_indices[cnt++] = bs*garray[B->j[j]]+l;
986       }
987     }
988     len = cnt - pcnt;
989     for (k=1; k<bs; k++) {
990       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
991       cnt += len;
992     }
993   }
994   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
995 
996   /* store the columns to the file */
997   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
998   if (!rank) {
999     MPI_Status status;
1000     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1001     for (i=1; i<size; i++) {
1002       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1003       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1004       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1005       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1006     }
1007     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1008   } else {
1009     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1010     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1011     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1012     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1013   }
1014   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1015 
1016   /* load up the numerical values */
1017   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1018   cnt  = 0;
1019   for (i=0; i<a->mbs; i++) {
1020     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1021     for (j=B->i[i]; j<B->i[i+1]; j++) {
1022       if (garray[B->j[j]] > cstart) break;
1023       for (l=0; l<bs; l++) {
1024         for (ll=0; ll<bs; ll++) {
1025           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1026         }
1027       }
1028       cnt += bs;
1029     }
1030     for (k=A->i[i]; k<A->i[i+1]; k++) {
1031       for (l=0; l<bs; l++) {
1032         for (ll=0; ll<bs; ll++) {
1033           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1034         }
1035       }
1036       cnt += bs;
1037     }
1038     for (; j<B->i[i+1]; j++) {
1039       for (l=0; l<bs; l++) {
1040         for (ll=0; ll<bs; ll++) {
1041           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1042         }
1043       }
1044       cnt += bs;
1045     }
1046     cnt += (bs-1)*rlen;
1047   }
1048   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1049 
1050   /* store the column values to the file */
1051   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1052   if (!rank) {
1053     MPI_Status status;
1054     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1055     for (i=1; i<size; i++) {
1056       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1057       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1058       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1059       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1060     }
1061     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1062   } else {
1063     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1064     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1065     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1066     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1067   }
1068   ierr = PetscFree(column_values);CHKERRQ(ierr);
1069 
1070   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1071   if (file) {
1072     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1073   }
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
1078 {
1079   PetscErrorCode ierr;
1080   PetscBool      iascii,isdraw,issocket,isbinary;
1081 
1082   PetscFunctionBegin;
1083   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1084   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1085   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1086   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1087   if (iascii || isdraw || issocket) {
1088     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1089   } else if (isbinary) {
1090     ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1091   }
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
1096 {
1097   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1098   PetscErrorCode ierr;
1099 
1100   PetscFunctionBegin;
1101 #if defined(PETSC_USE_LOG)
1102   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1103 #endif
1104   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1105   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1106   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1107   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1108 #if defined(PETSC_USE_CTABLE)
1109   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1110 #else
1111   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1112 #endif
1113   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1114   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1115   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1116   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
1117   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
1118   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
1119   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
1120   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
1121   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
1122   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1123   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1124   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1125   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
1126   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
1127   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
1128 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1129   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1130 #endif
1131   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
1132   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
1133   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1134   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1135 
1136   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1137   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1138   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1139   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1140   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
1141 #if defined(PETSC_HAVE_ELEMENTAL)
1142   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
1143 #endif
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1150   PetscErrorCode    ierr;
1151   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1152   PetscScalar       *from;
1153   const PetscScalar *x;
1154 
1155   PetscFunctionBegin;
1156   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1157   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1158 
1159   /* diagonal part */
1160   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1161   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1162 
1163   /* subdiagonal part */
1164   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1165 
1166   /* copy x into the vec slvec0 */
1167   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1168   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1169 
1170   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1171   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1172   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1173 
1174   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1175   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1176   /* supperdiagonal part */
1177   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1182 {
1183   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1184   PetscErrorCode    ierr;
1185   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1186   PetscScalar       *from;
1187   const PetscScalar *x;
1188 
1189   PetscFunctionBegin;
1190   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1191   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1192 
1193   /* diagonal part */
1194   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1195   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1196 
1197   /* subdiagonal part */
1198   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1199 
1200   /* copy x into the vec slvec0 */
1201   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1202   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1203 
1204   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1205   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1206   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1207 
1208   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1209   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1210   /* supperdiagonal part */
1211   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1212   PetscFunctionReturn(0);
1213 }
1214 
1215 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1216 {
1217   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1218   PetscErrorCode ierr;
1219   PetscInt       nt;
1220 
1221   PetscFunctionBegin;
1222   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1223   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1224 
1225   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1226   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1227 
1228   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1229   /* do diagonal part */
1230   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1231   /* do supperdiagonal part */
1232   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1233   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1234   /* do subdiagonal part */
1235   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1236   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1237   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1242 {
1243   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1244   PetscErrorCode    ierr;
1245   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1246   PetscScalar       *from,zero=0.0;
1247   const PetscScalar *x;
1248 
1249   PetscFunctionBegin;
1250   /*
1251   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
1252   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
1253   */
1254   /* diagonal part */
1255   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1256   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1257 
1258   /* subdiagonal part */
1259   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1260 
1261   /* copy x into the vec slvec0 */
1262   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1263   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1264   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1265   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1266 
1267   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1268   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1269   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1270 
1271   /* supperdiagonal part */
1272   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1277 {
1278   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1279   PetscErrorCode ierr;
1280 
1281   PetscFunctionBegin;
1282   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1283   /* do diagonal part */
1284   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1285   /* do supperdiagonal part */
1286   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1287   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1288 
1289   /* do subdiagonal part */
1290   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1291   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1292   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1293   PetscFunctionReturn(0);
1294 }
1295 
1296 /*
1297   This only works correctly for square matrices where the subblock A->A is the
1298    diagonal block
1299 */
1300 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1301 {
1302   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1303   PetscErrorCode ierr;
1304 
1305   PetscFunctionBegin;
1306   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1307   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1312 {
1313   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1314   PetscErrorCode ierr;
1315 
1316   PetscFunctionBegin;
1317   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1318   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1323 {
1324   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1325   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1326   PetscErrorCode ierr;
1327   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1328   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1329   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1330 
1331   PetscFunctionBegin;
1332   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1333   mat->getrowactive = PETSC_TRUE;
1334 
1335   if (!mat->rowvalues && (idx || v)) {
1336     /*
1337         allocate enough space to hold information from the longest row.
1338     */
1339     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1340     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1341     PetscInt     max = 1,mbs = mat->mbs,tmp;
1342     for (i=0; i<mbs; i++) {
1343       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1344       if (max < tmp) max = tmp;
1345     }
1346     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1347   }
1348 
1349   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1350   lrow = row - brstart;  /* local row index */
1351 
1352   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1353   if (!v)   {pvA = 0; pvB = 0;}
1354   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1355   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1356   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1357   nztot = nzA + nzB;
1358 
1359   cmap = mat->garray;
1360   if (v  || idx) {
1361     if (nztot) {
1362       /* Sort by increasing column numbers, assuming A and B already sorted */
1363       PetscInt imark = -1;
1364       if (v) {
1365         *v = v_p = mat->rowvalues;
1366         for (i=0; i<nzB; i++) {
1367           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1368           else break;
1369         }
1370         imark = i;
1371         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1372         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1373       }
1374       if (idx) {
1375         *idx = idx_p = mat->rowindices;
1376         if (imark > -1) {
1377           for (i=0; i<imark; i++) {
1378             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1379           }
1380         } else {
1381           for (i=0; i<nzB; i++) {
1382             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1383             else break;
1384           }
1385           imark = i;
1386         }
1387         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1388         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1389       }
1390     } else {
1391       if (idx) *idx = 0;
1392       if (v)   *v   = 0;
1393     }
1394   }
1395   *nz  = nztot;
1396   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1397   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1402 {
1403   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1404 
1405   PetscFunctionBegin;
1406   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1407   baij->getrowactive = PETSC_FALSE;
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1412 {
1413   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1414   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1415 
1416   PetscFunctionBegin;
1417   aA->getrow_utriangular = PETSC_TRUE;
1418   PetscFunctionReturn(0);
1419 }
1420 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1421 {
1422   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1423   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1424 
1425   PetscFunctionBegin;
1426   aA->getrow_utriangular = PETSC_FALSE;
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1431 {
1432   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1433   PetscErrorCode ierr;
1434 
1435   PetscFunctionBegin;
1436   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1437   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1442 {
1443   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1444   PetscErrorCode ierr;
1445 
1446   PetscFunctionBegin;
1447   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1448   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1449   PetscFunctionReturn(0);
1450 }
1451 
1452 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1453    Input: isrow       - distributed(parallel),
1454           iscol_local - locally owned (seq)
1455 */
1456 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1457 {
1458   PetscErrorCode ierr;
1459   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1460   const PetscInt *ptr1,*ptr2;
1461 
1462   PetscFunctionBegin;
1463   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1464   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1465   if (sz1 > sz2) {
1466     *flg = PETSC_FALSE;
1467     PetscFunctionReturn(0);
1468   }
1469 
1470   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1471   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1472 
1473   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1474   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1475   ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr);
1476   ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr);
1477   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1478   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1479 
1480   nmatch=0;
1481   k     = 0;
1482   for (i=0; i<sz1; i++){
1483     for (j=k; j<sz2; j++){
1484       if (a1[i] == a2[j]) {
1485         k = j; nmatch++;
1486         break;
1487       }
1488     }
1489   }
1490   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1491   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1492   ierr = PetscFree(a1);CHKERRQ(ierr);
1493   ierr = PetscFree(a2);CHKERRQ(ierr);
1494   if (nmatch < sz1) {
1495     *flg = PETSC_FALSE;
1496   } else {
1497     *flg = PETSC_TRUE;
1498   }
1499   PetscFunctionReturn(0);
1500 }
1501 
1502 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1503 {
1504   PetscErrorCode ierr;
1505   IS             iscol_local;
1506   PetscInt       csize;
1507   PetscBool      isequal;
1508 
1509   PetscFunctionBegin;
1510   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1511   if (call == MAT_REUSE_MATRIX) {
1512     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1513     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1514   } else {
1515     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1516     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1517     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1518   }
1519 
1520   /* now call MatCreateSubMatrix_MPIBAIJ() */
1521   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1522   if (call == MAT_INITIAL_MATRIX) {
1523     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1524     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1525   }
1526   PetscFunctionReturn(0);
1527 }
1528 
1529 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1530 {
1531   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1532   PetscErrorCode ierr;
1533 
1534   PetscFunctionBegin;
1535   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1536   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1537   PetscFunctionReturn(0);
1538 }
1539 
1540 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1541 {
1542   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1543   Mat            A  = a->A,B = a->B;
1544   PetscErrorCode ierr;
1545   PetscReal      isend[5],irecv[5];
1546 
1547   PetscFunctionBegin;
1548   info->block_size = (PetscReal)matin->rmap->bs;
1549 
1550   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1551 
1552   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1553   isend[3] = info->memory;  isend[4] = info->mallocs;
1554 
1555   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1556 
1557   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1558   isend[3] += info->memory;  isend[4] += info->mallocs;
1559   if (flag == MAT_LOCAL) {
1560     info->nz_used      = isend[0];
1561     info->nz_allocated = isend[1];
1562     info->nz_unneeded  = isend[2];
1563     info->memory       = isend[3];
1564     info->mallocs      = isend[4];
1565   } else if (flag == MAT_GLOBAL_MAX) {
1566     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1567 
1568     info->nz_used      = irecv[0];
1569     info->nz_allocated = irecv[1];
1570     info->nz_unneeded  = irecv[2];
1571     info->memory       = irecv[3];
1572     info->mallocs      = irecv[4];
1573   } else if (flag == MAT_GLOBAL_SUM) {
1574     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1575 
1576     info->nz_used      = irecv[0];
1577     info->nz_allocated = irecv[1];
1578     info->nz_unneeded  = irecv[2];
1579     info->memory       = irecv[3];
1580     info->mallocs      = irecv[4];
1581   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1582   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1583   info->fill_ratio_needed = 0;
1584   info->factor_mallocs    = 0;
1585   PetscFunctionReturn(0);
1586 }
1587 
1588 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1589 {
1590   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1591   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1592   PetscErrorCode ierr;
1593 
1594   PetscFunctionBegin;
1595   switch (op) {
1596   case MAT_NEW_NONZERO_LOCATIONS:
1597   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1598   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1599   case MAT_KEEP_NONZERO_PATTERN:
1600   case MAT_SUBMAT_SINGLEIS:
1601   case MAT_NEW_NONZERO_LOCATION_ERR:
1602     MatCheckPreallocated(A,1);
1603     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1604     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1605     break;
1606   case MAT_ROW_ORIENTED:
1607     MatCheckPreallocated(A,1);
1608     a->roworiented = flg;
1609 
1610     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1611     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1612     break;
1613   case MAT_NEW_DIAGONALS:
1614     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1615     break;
1616   case MAT_IGNORE_OFF_PROC_ENTRIES:
1617     a->donotstash = flg;
1618     break;
1619   case MAT_USE_HASH_TABLE:
1620     a->ht_flag = flg;
1621     break;
1622   case MAT_HERMITIAN:
1623     MatCheckPreallocated(A,1);
1624     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1625     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1626 #if defined(PETSC_USE_COMPLEX)
1627     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1628 #endif
1629     break;
1630   case MAT_SPD:
1631     A->spd_set = PETSC_TRUE;
1632     A->spd     = flg;
1633     if (flg) {
1634       A->symmetric                  = PETSC_TRUE;
1635       A->structurally_symmetric     = PETSC_TRUE;
1636       A->symmetric_set              = PETSC_TRUE;
1637       A->structurally_symmetric_set = PETSC_TRUE;
1638     }
1639     break;
1640   case MAT_SYMMETRIC:
1641     MatCheckPreallocated(A,1);
1642     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1643     break;
1644   case MAT_STRUCTURALLY_SYMMETRIC:
1645     MatCheckPreallocated(A,1);
1646     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1647     break;
1648   case MAT_SYMMETRY_ETERNAL:
1649     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1650     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1651     break;
1652   case MAT_IGNORE_LOWER_TRIANGULAR:
1653     aA->ignore_ltriangular = flg;
1654     break;
1655   case MAT_ERROR_LOWER_TRIANGULAR:
1656     aA->ignore_ltriangular = flg;
1657     break;
1658   case MAT_GETROW_UPPERTRIANGULAR:
1659     aA->getrow_utriangular = flg;
1660     break;
1661   default:
1662     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1663   }
1664   PetscFunctionReturn(0);
1665 }
1666 
1667 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1668 {
1669   PetscErrorCode ierr;
1670 
1671   PetscFunctionBegin;
1672   if (reuse == MAT_INITIAL_MATRIX) {
1673     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1674   }  else if (reuse == MAT_REUSE_MATRIX) {
1675     ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1676   }
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1681 {
1682   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1683   Mat            a     = baij->A, b=baij->B;
1684   PetscErrorCode ierr;
1685   PetscInt       nv,m,n;
1686   PetscBool      flg;
1687 
1688   PetscFunctionBegin;
1689   if (ll != rr) {
1690     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1691     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1692   }
1693   if (!ll) PetscFunctionReturn(0);
1694 
1695   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1696   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1697 
1698   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1699   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1700 
1701   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1702 
1703   /* left diagonalscale the off-diagonal part */
1704   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1705 
1706   /* scale the diagonal part */
1707   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1708 
1709   /* right diagonalscale the off-diagonal part */
1710   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1711   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1712   PetscFunctionReturn(0);
1713 }
1714 
1715 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1716 {
1717   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1718   PetscErrorCode ierr;
1719 
1720   PetscFunctionBegin;
1721   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1722   PetscFunctionReturn(0);
1723 }
1724 
1725 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1726 
1727 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1728 {
1729   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1730   Mat            a,b,c,d;
1731   PetscBool      flg;
1732   PetscErrorCode ierr;
1733 
1734   PetscFunctionBegin;
1735   a = matA->A; b = matA->B;
1736   c = matB->A; d = matB->B;
1737 
1738   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1739   if (flg) {
1740     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1741   }
1742   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1743   PetscFunctionReturn(0);
1744 }
1745 
1746 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1747 {
1748   PetscErrorCode ierr;
1749   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1750   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1751 
1752   PetscFunctionBegin;
1753   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1754   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1755     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1756     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1757     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1758   } else {
1759     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1760     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1761   }
1762   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1763   PetscFunctionReturn(0);
1764 }
1765 
1766 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1767 {
1768   PetscErrorCode ierr;
1769 
1770   PetscFunctionBegin;
1771   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1772   PetscFunctionReturn(0);
1773 }
1774 
1775 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1776 {
1777   PetscErrorCode ierr;
1778   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1779   PetscBLASInt   bnz,one=1;
1780   Mat_SeqSBAIJ   *xa,*ya;
1781   Mat_SeqBAIJ    *xb,*yb;
1782 
1783   PetscFunctionBegin;
1784   if (str == SAME_NONZERO_PATTERN) {
1785     PetscScalar alpha = a;
1786     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1787     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1788     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1789     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1790     xb   = (Mat_SeqBAIJ*)xx->B->data;
1791     yb   = (Mat_SeqBAIJ*)yy->B->data;
1792     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1793     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1794     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1795   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1796     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1797     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1798     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1799   } else {
1800     Mat      B;
1801     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1802     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1803     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1804     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1805     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1806     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1807     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1808     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1809     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1810     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1811     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1812     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1813     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1814     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1815     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1816     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1817     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1818     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1819     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1820     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1821   }
1822   PetscFunctionReturn(0);
1823 }
1824 
1825 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1826 {
1827   PetscErrorCode ierr;
1828   PetscInt       i;
1829   PetscBool      flg;
1830 
1831   PetscFunctionBegin;
1832   ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */
1833   for (i=0; i<n; i++) {
1834     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1835     if (!flg) {
1836       ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr);
1837     }
1838   }
1839   PetscFunctionReturn(0);
1840 }
1841 
1842 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1843 {
1844   PetscErrorCode ierr;
1845   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1846   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1847 
1848   PetscFunctionBegin;
1849   if (!Y->preallocated) {
1850     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1851   } else if (!aij->nz) {
1852     PetscInt nonew = aij->nonew;
1853     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1854     aij->nonew = nonew;
1855   }
1856   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1857   PetscFunctionReturn(0);
1858 }
1859 
1860 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1861 {
1862   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1863   PetscErrorCode ierr;
1864 
1865   PetscFunctionBegin;
1866   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1867   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
1868   if (d) {
1869     PetscInt rstart;
1870     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1871     *d += rstart/A->rmap->bs;
1872 
1873   }
1874   PetscFunctionReturn(0);
1875 }
1876 
1877 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1878 {
1879   PetscFunctionBegin;
1880   *a = ((Mat_MPISBAIJ*)A->data)->A;
1881   PetscFunctionReturn(0);
1882 }
1883 
1884 /* -------------------------------------------------------------------*/
1885 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1886                                        MatGetRow_MPISBAIJ,
1887                                        MatRestoreRow_MPISBAIJ,
1888                                        MatMult_MPISBAIJ,
1889                                /*  4*/ MatMultAdd_MPISBAIJ,
1890                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1891                                        MatMultAdd_MPISBAIJ,
1892                                        0,
1893                                        0,
1894                                        0,
1895                                /* 10*/ 0,
1896                                        0,
1897                                        0,
1898                                        MatSOR_MPISBAIJ,
1899                                        MatTranspose_MPISBAIJ,
1900                                /* 15*/ MatGetInfo_MPISBAIJ,
1901                                        MatEqual_MPISBAIJ,
1902                                        MatGetDiagonal_MPISBAIJ,
1903                                        MatDiagonalScale_MPISBAIJ,
1904                                        MatNorm_MPISBAIJ,
1905                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1906                                        MatAssemblyEnd_MPISBAIJ,
1907                                        MatSetOption_MPISBAIJ,
1908                                        MatZeroEntries_MPISBAIJ,
1909                                /* 24*/ 0,
1910                                        0,
1911                                        0,
1912                                        0,
1913                                        0,
1914                                /* 29*/ MatSetUp_MPISBAIJ,
1915                                        0,
1916                                        0,
1917                                        MatGetDiagonalBlock_MPISBAIJ,
1918                                        0,
1919                                /* 34*/ MatDuplicate_MPISBAIJ,
1920                                        0,
1921                                        0,
1922                                        0,
1923                                        0,
1924                                /* 39*/ MatAXPY_MPISBAIJ,
1925                                        MatCreateSubMatrices_MPISBAIJ,
1926                                        MatIncreaseOverlap_MPISBAIJ,
1927                                        MatGetValues_MPISBAIJ,
1928                                        MatCopy_MPISBAIJ,
1929                                /* 44*/ 0,
1930                                        MatScale_MPISBAIJ,
1931                                        MatShift_MPISBAIJ,
1932                                        0,
1933                                        0,
1934                                /* 49*/ 0,
1935                                        0,
1936                                        0,
1937                                        0,
1938                                        0,
1939                                /* 54*/ 0,
1940                                        0,
1941                                        MatSetUnfactored_MPISBAIJ,
1942                                        0,
1943                                        MatSetValuesBlocked_MPISBAIJ,
1944                                /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1945                                        0,
1946                                        0,
1947                                        0,
1948                                        0,
1949                                /* 64*/ 0,
1950                                        0,
1951                                        0,
1952                                        0,
1953                                        0,
1954                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1955                                        0,
1956                                        0,
1957                                        0,
1958                                        0,
1959                                /* 74*/ 0,
1960                                        0,
1961                                        0,
1962                                        0,
1963                                        0,
1964                                /* 79*/ 0,
1965                                        0,
1966                                        0,
1967                                        0,
1968                                        MatLoad_MPISBAIJ,
1969                                /* 84*/ 0,
1970                                        0,
1971                                        0,
1972                                        0,
1973                                        0,
1974                                /* 89*/ 0,
1975                                        0,
1976                                        0,
1977                                        0,
1978                                        0,
1979                                /* 94*/ 0,
1980                                        0,
1981                                        0,
1982                                        0,
1983                                        0,
1984                                /* 99*/ 0,
1985                                        0,
1986                                        0,
1987                                        0,
1988                                        0,
1989                                /*104*/ 0,
1990                                        MatRealPart_MPISBAIJ,
1991                                        MatImaginaryPart_MPISBAIJ,
1992                                        MatGetRowUpperTriangular_MPISBAIJ,
1993                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1994                                /*109*/ 0,
1995                                        0,
1996                                        0,
1997                                        0,
1998                                        MatMissingDiagonal_MPISBAIJ,
1999                                /*114*/ 0,
2000                                        0,
2001                                        0,
2002                                        0,
2003                                        0,
2004                                /*119*/ 0,
2005                                        0,
2006                                        0,
2007                                        0,
2008                                        0,
2009                                /*124*/ 0,
2010                                        0,
2011                                        0,
2012                                        0,
2013                                        0,
2014                                /*129*/ 0,
2015                                        0,
2016                                        0,
2017                                        0,
2018                                        0,
2019                                /*134*/ 0,
2020                                        0,
2021                                        0,
2022                                        0,
2023                                        0,
2024                                /*139*/ MatSetBlockSizes_Default,
2025                                        0,
2026                                        0,
2027                                        0,
2028                                        0,
2029                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
2030 };
2031 
2032 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2033 {
2034   Mat_MPISBAIJ   *b;
2035   PetscErrorCode ierr;
2036   PetscInt       i,mbs,Mbs;
2037 
2038   PetscFunctionBegin;
2039   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2040   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2041   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2042   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2043 
2044   b   = (Mat_MPISBAIJ*)B->data;
2045   mbs = B->rmap->n/bs;
2046   Mbs = B->rmap->N/bs;
2047   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
2048 
2049   B->rmap->bs = bs;
2050   b->bs2      = bs*bs;
2051   b->mbs      = mbs;
2052   b->Mbs      = Mbs;
2053   b->nbs      = B->cmap->n/bs;
2054   b->Nbs      = B->cmap->N/bs;
2055 
2056   for (i=0; i<=b->size; i++) {
2057     b->rangebs[i] = B->rmap->range[i]/bs;
2058   }
2059   b->rstartbs = B->rmap->rstart/bs;
2060   b->rendbs   = B->rmap->rend/bs;
2061 
2062   b->cstartbs = B->cmap->rstart/bs;
2063   b->cendbs   = B->cmap->rend/bs;
2064 
2065 #if defined(PETSC_USE_CTABLE)
2066   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2067 #else
2068   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2069 #endif
2070   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2071   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2072   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2073   ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr);
2074   ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr);
2075   ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr);
2076   ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr);
2077   ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr);
2078   ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr);
2079 
2080   /* Because the B will have been resized we simply destroy it and create a new one each time */
2081   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2082   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2083   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2084   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2085   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2086 
2087   if (!B->preallocated) {
2088     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2089     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2090     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
2091     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2092     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2093   }
2094 
2095   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2096   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2097 
2098   B->preallocated  = PETSC_TRUE;
2099   B->was_assembled = PETSC_FALSE;
2100   B->assembled     = PETSC_FALSE;
2101   PetscFunctionReturn(0);
2102 }
2103 
2104 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2105 {
2106   PetscInt       m,rstart,cstart,cend;
2107   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2108   const PetscInt *JJ    =0;
2109   PetscScalar    *values=0;
2110   PetscErrorCode ierr;
2111 
2112   PetscFunctionBegin;
2113   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2114   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2115   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2116   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2117   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2118   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2119   m      = B->rmap->n/bs;
2120   rstart = B->rmap->rstart/bs;
2121   cstart = B->cmap->rstart/bs;
2122   cend   = B->cmap->rend/bs;
2123 
2124   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2125   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2126   for (i=0; i<m; i++) {
2127     nz = ii[i+1] - ii[i];
2128     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2129     nz_max = PetscMax(nz_max,nz);
2130     JJ     = jj + ii[i];
2131     for (j=0; j<nz; j++) {
2132       if (*JJ >= cstart) break;
2133       JJ++;
2134     }
2135     d = 0;
2136     for (; j<nz; j++) {
2137       if (*JJ++ >= cend) break;
2138       d++;
2139     }
2140     d_nnz[i] = d;
2141     o_nnz[i] = nz - d;
2142   }
2143   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2144   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2145 
2146   values = (PetscScalar*)V;
2147   if (!values) {
2148     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2149     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2150   }
2151   for (i=0; i<m; i++) {
2152     PetscInt          row    = i + rstart;
2153     PetscInt          ncols  = ii[i+1] - ii[i];
2154     const PetscInt    *icols = jj + ii[i];
2155     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2156     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2157   }
2158 
2159   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2160   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2161   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2162   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 /*MC
2167    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2168    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2169    the matrix is stored.
2170 
2171   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2172   can call MatSetOption(Mat, MAT_HERMITIAN);
2173 
2174    Options Database Keys:
2175 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2176 
2177   Level: beginner
2178 
2179 .seealso: MatCreateMPISBAIJ
2180 M*/
2181 
2182 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
2183 
2184 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2185 {
2186   Mat_MPISBAIJ   *b;
2187   PetscErrorCode ierr;
2188   PetscBool      flg = PETSC_FALSE;
2189 
2190   PetscFunctionBegin;
2191   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2192   B->data = (void*)b;
2193   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2194 
2195   B->ops->destroy = MatDestroy_MPISBAIJ;
2196   B->ops->view    = MatView_MPISBAIJ;
2197   B->assembled    = PETSC_FALSE;
2198   B->insertmode   = NOT_SET_VALUES;
2199 
2200   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2201   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2202 
2203   /* build local table of row and column ownerships */
2204   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2205 
2206   /* build cache for off array entries formed */
2207   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2208 
2209   b->donotstash  = PETSC_FALSE;
2210   b->colmap      = NULL;
2211   b->garray      = NULL;
2212   b->roworiented = PETSC_TRUE;
2213 
2214   /* stuff used in block assembly */
2215   b->barray = 0;
2216 
2217   /* stuff used for matrix vector multiply */
2218   b->lvec    = 0;
2219   b->Mvctx   = 0;
2220   b->slvec0  = 0;
2221   b->slvec0b = 0;
2222   b->slvec1  = 0;
2223   b->slvec1a = 0;
2224   b->slvec1b = 0;
2225   b->sMvctx  = 0;
2226 
2227   /* stuff for MatGetRow() */
2228   b->rowindices   = 0;
2229   b->rowvalues    = 0;
2230   b->getrowactive = PETSC_FALSE;
2231 
2232   /* hash table stuff */
2233   b->ht           = 0;
2234   b->hd           = 0;
2235   b->ht_size      = 0;
2236   b->ht_flag      = PETSC_FALSE;
2237   b->ht_fact      = 0;
2238   b->ht_total_ct  = 0;
2239   b->ht_insert_ct = 0;
2240 
2241   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2242   b->ijonly = PETSC_FALSE;
2243 
2244   b->in_loc = 0;
2245   b->v_loc  = 0;
2246   b->n_loc  = 0;
2247 
2248   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2249   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2250   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2251   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2252 #if defined(PETSC_HAVE_ELEMENTAL)
2253   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2254 #endif
2255 
2256   B->symmetric                  = PETSC_TRUE;
2257   B->structurally_symmetric     = PETSC_TRUE;
2258   B->symmetric_set              = PETSC_TRUE;
2259   B->structurally_symmetric_set = PETSC_TRUE;
2260   B->symmetric_eternal          = PETSC_TRUE;
2261 
2262   B->hermitian                  = PETSC_FALSE;
2263   B->hermitian_set              = PETSC_FALSE;
2264 
2265   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2266   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2267   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2268   if (flg) {
2269     PetscReal fact = 1.39;
2270     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2271     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2272     if (fact <= 1.0) fact = 1.39;
2273     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2274     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2275   }
2276   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2277   PetscFunctionReturn(0);
2278 }
2279 
2280 /*MC
2281    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2282 
2283    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2284    and MATMPISBAIJ otherwise.
2285 
2286    Options Database Keys:
2287 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2288 
2289   Level: beginner
2290 
2291 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
2292 M*/
2293 
2294 /*@C
2295    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2296    the user should preallocate the matrix storage by setting the parameters
2297    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2298    performance can be increased by more than a factor of 50.
2299 
2300    Collective on Mat
2301 
2302    Input Parameters:
2303 +  B - the matrix
2304 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2305           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2306 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2307            submatrix  (same for all local rows)
2308 .  d_nnz - array containing the number of block nonzeros in the various block rows
2309            in the upper triangular and diagonal part of the in diagonal portion of the local
2310            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2311            for the diagonal entry and set a value even if it is zero.
2312 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2313            submatrix (same for all local rows).
2314 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2315            off-diagonal portion of the local submatrix that is right of the diagonal
2316            (possibly different for each block row) or NULL.
2317 
2318 
2319    Options Database Keys:
2320 .   -mat_no_unroll - uses code that does not unroll the loops in the
2321                      block calculations (much slower)
2322 .   -mat_block_size - size of the blocks to use
2323 
2324    Notes:
2325 
2326    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2327    than it must be used on all processors that share the object for that argument.
2328 
2329    If the *_nnz parameter is given then the *_nz parameter is ignored
2330 
2331    Storage Information:
2332    For a square global matrix we define each processor's diagonal portion
2333    to be its local rows and the corresponding columns (a square submatrix);
2334    each processor's off-diagonal portion encompasses the remainder of the
2335    local matrix (a rectangular submatrix).
2336 
2337    The user can specify preallocated storage for the diagonal part of
2338    the local submatrix with either d_nz or d_nnz (not both).  Set
2339    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2340    memory allocation.  Likewise, specify preallocated storage for the
2341    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2342 
2343    You can call MatGetInfo() to get information on how effective the preallocation was;
2344    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2345    You can also run with the option -info and look for messages with the string
2346    malloc in them to see if additional memory allocation was needed.
2347 
2348    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2349    the figure below we depict these three local rows and all columns (0-11).
2350 
2351 .vb
2352            0 1 2 3 4 5 6 7 8 9 10 11
2353           --------------------------
2354    row 3  |. . . d d d o o o o  o  o
2355    row 4  |. . . d d d o o o o  o  o
2356    row 5  |. . . d d d o o o o  o  o
2357           --------------------------
2358 .ve
2359 
2360    Thus, any entries in the d locations are stored in the d (diagonal)
2361    submatrix, and any entries in the o locations are stored in the
2362    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2363    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2364 
2365    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2366    plus the diagonal part of the d matrix,
2367    and o_nz should indicate the number of block nonzeros per row in the o matrix
2368 
2369    In general, for PDE problems in which most nonzeros are near the diagonal,
2370    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2371    or you will get TERRIBLE performance; see the users' manual chapter on
2372    matrices.
2373 
2374    Level: intermediate
2375 
2376 .keywords: matrix, block, aij, compressed row, sparse, parallel
2377 
2378 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2379 @*/
2380 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2381 {
2382   PetscErrorCode ierr;
2383 
2384   PetscFunctionBegin;
2385   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2386   PetscValidType(B,1);
2387   PetscValidLogicalCollectiveInt(B,bs,2);
2388   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2389   PetscFunctionReturn(0);
2390 }
2391 
2392 /*@C
2393    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2394    (block compressed row).  For good matrix assembly performance
2395    the user should preallocate the matrix storage by setting the parameters
2396    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2397    performance can be increased by more than a factor of 50.
2398 
2399    Collective on MPI_Comm
2400 
2401    Input Parameters:
2402 +  comm - MPI communicator
2403 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2404           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2405 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2406            This value should be the same as the local size used in creating the
2407            y vector for the matrix-vector product y = Ax.
2408 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2409            This value should be the same as the local size used in creating the
2410            x vector for the matrix-vector product y = Ax.
2411 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2412 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2413 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2414            submatrix  (same for all local rows)
2415 .  d_nnz - array containing the number of block nonzeros in the various block rows
2416            in the upper triangular portion of the in diagonal portion of the local
2417            (possibly different for each block block row) or NULL.
2418            If you plan to factor the matrix you must leave room for the diagonal entry and
2419            set its value even if it is zero.
2420 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2421            submatrix (same for all local rows).
2422 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2423            off-diagonal portion of the local submatrix (possibly different for
2424            each block row) or NULL.
2425 
2426    Output Parameter:
2427 .  A - the matrix
2428 
2429    Options Database Keys:
2430 .   -mat_no_unroll - uses code that does not unroll the loops in the
2431                      block calculations (much slower)
2432 .   -mat_block_size - size of the blocks to use
2433 .   -mat_mpi - use the parallel matrix data structures even on one processor
2434                (defaults to using SeqBAIJ format on one processor)
2435 
2436    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2437    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2438    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2439 
2440    Notes:
2441    The number of rows and columns must be divisible by blocksize.
2442    This matrix type does not support complex Hermitian operation.
2443 
2444    The user MUST specify either the local or global matrix dimensions
2445    (possibly both).
2446 
2447    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2448    than it must be used on all processors that share the object for that argument.
2449 
2450    If the *_nnz parameter is given then the *_nz parameter is ignored
2451 
2452    Storage Information:
2453    For a square global matrix we define each processor's diagonal portion
2454    to be its local rows and the corresponding columns (a square submatrix);
2455    each processor's off-diagonal portion encompasses the remainder of the
2456    local matrix (a rectangular submatrix).
2457 
2458    The user can specify preallocated storage for the diagonal part of
2459    the local submatrix with either d_nz or d_nnz (not both).  Set
2460    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2461    memory allocation.  Likewise, specify preallocated storage for the
2462    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2463 
2464    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2465    the figure below we depict these three local rows and all columns (0-11).
2466 
2467 .vb
2468            0 1 2 3 4 5 6 7 8 9 10 11
2469           --------------------------
2470    row 3  |. . . d d d o o o o  o  o
2471    row 4  |. . . d d d o o o o  o  o
2472    row 5  |. . . d d d o o o o  o  o
2473           --------------------------
2474 .ve
2475 
2476    Thus, any entries in the d locations are stored in the d (diagonal)
2477    submatrix, and any entries in the o locations are stored in the
2478    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2479    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2480 
2481    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2482    plus the diagonal part of the d matrix,
2483    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2484    In general, for PDE problems in which most nonzeros are near the diagonal,
2485    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2486    or you will get TERRIBLE performance; see the users' manual chapter on
2487    matrices.
2488 
2489    Level: intermediate
2490 
2491 .keywords: matrix, block, aij, compressed row, sparse, parallel
2492 
2493 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2494 @*/
2495 
2496 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2497 {
2498   PetscErrorCode ierr;
2499   PetscMPIInt    size;
2500 
2501   PetscFunctionBegin;
2502   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2503   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2504   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2505   if (size > 1) {
2506     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2507     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2508   } else {
2509     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2510     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2511   }
2512   PetscFunctionReturn(0);
2513 }
2514 
2515 
2516 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2517 {
2518   Mat            mat;
2519   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2520   PetscErrorCode ierr;
2521   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2522   PetscScalar    *array;
2523 
2524   PetscFunctionBegin;
2525   *newmat = 0;
2526 
2527   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2528   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2529   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2530   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2531   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2532   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2533 
2534   mat->factortype   = matin->factortype;
2535   mat->preallocated = PETSC_TRUE;
2536   mat->assembled    = PETSC_TRUE;
2537   mat->insertmode   = NOT_SET_VALUES;
2538 
2539   a      = (Mat_MPISBAIJ*)mat->data;
2540   a->bs2 = oldmat->bs2;
2541   a->mbs = oldmat->mbs;
2542   a->nbs = oldmat->nbs;
2543   a->Mbs = oldmat->Mbs;
2544   a->Nbs = oldmat->Nbs;
2545 
2546 
2547   a->size         = oldmat->size;
2548   a->rank         = oldmat->rank;
2549   a->donotstash   = oldmat->donotstash;
2550   a->roworiented  = oldmat->roworiented;
2551   a->rowindices   = 0;
2552   a->rowvalues    = 0;
2553   a->getrowactive = PETSC_FALSE;
2554   a->barray       = 0;
2555   a->rstartbs     = oldmat->rstartbs;
2556   a->rendbs       = oldmat->rendbs;
2557   a->cstartbs     = oldmat->cstartbs;
2558   a->cendbs       = oldmat->cendbs;
2559 
2560   /* hash table stuff */
2561   a->ht           = 0;
2562   a->hd           = 0;
2563   a->ht_size      = 0;
2564   a->ht_flag      = oldmat->ht_flag;
2565   a->ht_fact      = oldmat->ht_fact;
2566   a->ht_total_ct  = 0;
2567   a->ht_insert_ct = 0;
2568 
2569   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2570   if (oldmat->colmap) {
2571 #if defined(PETSC_USE_CTABLE)
2572     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2573 #else
2574     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2575     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2576     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2577 #endif
2578   } else a->colmap = 0;
2579 
2580   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2581     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2582     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2583     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2584   } else a->garray = 0;
2585 
2586   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2587   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2588   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2589   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2590   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2591 
2592   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2593   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2594   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2595   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2596 
2597   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2598   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2599   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2600   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2601   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2602   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2603   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2604   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2605   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2606   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2607   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2608   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2609   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2610 
2611   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2612   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2613   a->sMvctx = oldmat->sMvctx;
2614   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2615 
2616   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2617   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2618   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2619   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2620   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2621   *newmat = mat;
2622   PetscFunctionReturn(0);
2623 }
2624 
2625 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2626 {
2627   PetscErrorCode ierr;
2628   PetscInt       i,nz,j,rstart,rend;
2629   PetscScalar    *vals,*buf;
2630   MPI_Comm       comm;
2631   MPI_Status     status;
2632   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2633   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2634   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2635   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2636   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2637   PetscInt       dcount,kmax,k,nzcount,tmp;
2638   int            fd;
2639 
2640   PetscFunctionBegin;
2641   /* force binary viewer to load .info file if it has not yet done so */
2642   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2643   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2644   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2645   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2646   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2647   if (bs < 0) bs = 1;
2648 
2649   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2650   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2651   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2652   if (!rank) {
2653     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2654     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2655     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2656   }
2657 
2658   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2659   M    = header[1];
2660   N    = header[2];
2661 
2662   /* If global sizes are set, check if they are consistent with that given in the file */
2663   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
2664   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
2665 
2666   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2667 
2668   /*
2669      This code adds extra rows to make sure the number of rows is
2670      divisible by the blocksize
2671   */
2672   Mbs        = M/bs;
2673   extra_rows = bs - M + bs*(Mbs);
2674   if (extra_rows == bs) extra_rows = 0;
2675   else                  Mbs++;
2676   if (extra_rows &&!rank) {
2677     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2678   }
2679 
2680   /* determine ownership of all rows */
2681   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2682     mbs = Mbs/size + ((Mbs % size) > rank);
2683     m   = mbs*bs;
2684   } else { /* User Set */
2685     m   = newmat->rmap->n;
2686     mbs = m/bs;
2687   }
2688   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2689   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2690   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2691   rowners[0] = 0;
2692   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2693   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2694   rstart = rowners[rank];
2695   rend   = rowners[rank+1];
2696 
2697   /* distribute row lengths to all processors */
2698   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2699   if (!rank) {
2700     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2701     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2702     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2703     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2704     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2705     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2706     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2707   } else {
2708     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2709   }
2710 
2711   if (!rank) {   /* procs[0] */
2712     /* calculate the number of nonzeros on each processor */
2713     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2714     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2715     for (i=0; i<size; i++) {
2716       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2717         procsnz[i] += rowlengths[j];
2718       }
2719     }
2720     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2721 
2722     /* determine max buffer needed and allocate it */
2723     maxnz = 0;
2724     for (i=0; i<size; i++) {
2725       maxnz = PetscMax(maxnz,procsnz[i]);
2726     }
2727     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2728 
2729     /* read in my part of the matrix column indices  */
2730     nz     = procsnz[0];
2731     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2732     mycols = ibuf;
2733     if (size == 1) nz -= extra_rows;
2734     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2735     if (size == 1) {
2736       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2737     }
2738 
2739     /* read in every ones (except the last) and ship off */
2740     for (i=1; i<size-1; i++) {
2741       nz   = procsnz[i];
2742       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2743       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2744     }
2745     /* read in the stuff for the last proc */
2746     if (size != 1) {
2747       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2748       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2749       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2750       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2751     }
2752     ierr = PetscFree(cols);CHKERRQ(ierr);
2753   } else {  /* procs[i], i>0 */
2754     /* determine buffer space needed for message */
2755     nz = 0;
2756     for (i=0; i<m; i++) nz += locrowlens[i];
2757     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2758     mycols = ibuf;
2759     /* receive message of column indices*/
2760     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2761     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2762     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2763   }
2764 
2765   /* loop over local rows, determining number of off diagonal entries */
2766   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2767   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2768   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2769   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2770   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2771   rowcount = 0;
2772   nzcount  = 0;
2773   for (i=0; i<mbs; i++) {
2774     dcount  = 0;
2775     odcount = 0;
2776     for (j=0; j<bs; j++) {
2777       kmax = locrowlens[rowcount];
2778       for (k=0; k<kmax; k++) {
2779         tmp = mycols[nzcount++]/bs; /* block col. index */
2780         if (!mask[tmp]) {
2781           mask[tmp] = 1;
2782           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2783           else masked1[dcount++] = tmp; /* entry in diag portion */
2784         }
2785       }
2786       rowcount++;
2787     }
2788 
2789     dlens[i]  = dcount;  /* d_nzz[i] */
2790     odlens[i] = odcount; /* o_nzz[i] */
2791 
2792     /* zero out the mask elements we set */
2793     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2794     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2795   }
2796   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2797   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2798   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2799 
2800   if (!rank) {
2801     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2802     /* read in my part of the matrix numerical values  */
2803     nz     = procsnz[0];
2804     vals   = buf;
2805     mycols = ibuf;
2806     if (size == 1) nz -= extra_rows;
2807     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2808     if (size == 1) {
2809       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2810     }
2811 
2812     /* insert into matrix */
2813     jj = rstart*bs;
2814     for (i=0; i<m; i++) {
2815       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2816       mycols += locrowlens[i];
2817       vals   += locrowlens[i];
2818       jj++;
2819     }
2820 
2821     /* read in other processors (except the last one) and ship out */
2822     for (i=1; i<size-1; i++) {
2823       nz   = procsnz[i];
2824       vals = buf;
2825       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2826       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2827     }
2828     /* the last proc */
2829     if (size != 1) {
2830       nz   = procsnz[i] - extra_rows;
2831       vals = buf;
2832       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2833       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2834       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2835     }
2836     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2837 
2838   } else {
2839     /* receive numeric values */
2840     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2841 
2842     /* receive message of values*/
2843     vals   = buf;
2844     mycols = ibuf;
2845     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2846     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2847     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2848 
2849     /* insert into matrix */
2850     jj = rstart*bs;
2851     for (i=0; i<m; i++) {
2852       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2853       mycols += locrowlens[i];
2854       vals   += locrowlens[i];
2855       jj++;
2856     }
2857   }
2858 
2859   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2860   ierr = PetscFree(buf);CHKERRQ(ierr);
2861   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2862   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2863   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2864   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2865   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2866   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2867   PetscFunctionReturn(0);
2868 }
2869 
2870 /*XXXXX@
2871    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2872 
2873    Input Parameters:
2874 .  mat  - the matrix
2875 .  fact - factor
2876 
2877    Not Collective on Mat, each process can have a different hash factor
2878 
2879    Level: advanced
2880 
2881   Notes:
2882    This can also be set by the command line option: -mat_use_hash_table fact
2883 
2884 .keywords: matrix, hashtable, factor, HT
2885 
2886 .seealso: MatSetOption()
2887 @XXXXX*/
2888 
2889 
2890 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2891 {
2892   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2893   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2894   PetscReal      atmp;
2895   PetscReal      *work,*svalues,*rvalues;
2896   PetscErrorCode ierr;
2897   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2898   PetscMPIInt    rank,size;
2899   PetscInt       *rowners_bs,dest,count,source;
2900   PetscScalar    *va;
2901   MatScalar      *ba;
2902   MPI_Status     stat;
2903 
2904   PetscFunctionBegin;
2905   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2906   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2907   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2908 
2909   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2910   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2911 
2912   bs  = A->rmap->bs;
2913   mbs = a->mbs;
2914   Mbs = a->Mbs;
2915   ba  = b->a;
2916   bi  = b->i;
2917   bj  = b->j;
2918 
2919   /* find ownerships */
2920   rowners_bs = A->rmap->range;
2921 
2922   /* each proc creates an array to be distributed */
2923   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2924   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2925 
2926   /* row_max for B */
2927   if (rank != size-1) {
2928     for (i=0; i<mbs; i++) {
2929       ncols = bi[1] - bi[0]; bi++;
2930       brow  = bs*i;
2931       for (j=0; j<ncols; j++) {
2932         bcol = bs*(*bj);
2933         for (kcol=0; kcol<bs; kcol++) {
2934           col  = bcol + kcol;                /* local col index */
2935           col += rowners_bs[rank+1];      /* global col index */
2936           for (krow=0; krow<bs; krow++) {
2937             atmp = PetscAbsScalar(*ba); ba++;
2938             row  = brow + krow;   /* local row index */
2939             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2940             if (work[col] < atmp) work[col] = atmp;
2941           }
2942         }
2943         bj++;
2944       }
2945     }
2946 
2947     /* send values to its owners */
2948     for (dest=rank+1; dest<size; dest++) {
2949       svalues = work + rowners_bs[dest];
2950       count   = rowners_bs[dest+1]-rowners_bs[dest];
2951       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2952     }
2953   }
2954 
2955   /* receive values */
2956   if (rank) {
2957     rvalues = work;
2958     count   = rowners_bs[rank+1]-rowners_bs[rank];
2959     for (source=0; source<rank; source++) {
2960       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2961       /* process values */
2962       for (i=0; i<count; i++) {
2963         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2964       }
2965     }
2966   }
2967 
2968   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2969   ierr = PetscFree(work);CHKERRQ(ierr);
2970   PetscFunctionReturn(0);
2971 }
2972 
2973 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2974 {
2975   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2976   PetscErrorCode    ierr;
2977   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2978   PetscScalar       *x,*ptr,*from;
2979   Vec               bb1;
2980   const PetscScalar *b;
2981 
2982   PetscFunctionBegin;
2983   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2984   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2985 
2986   if (flag == SOR_APPLY_UPPER) {
2987     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2988     PetscFunctionReturn(0);
2989   }
2990 
2991   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2992     if (flag & SOR_ZERO_INITIAL_GUESS) {
2993       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2994       its--;
2995     }
2996 
2997     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2998     while (its--) {
2999 
3000       /* lower triangular part: slvec0b = - B^T*xx */
3001       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
3002 
3003       /* copy xx into slvec0a */
3004       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
3005       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3006       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3007       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
3008 
3009       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
3010 
3011       /* copy bb into slvec1a */
3012       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3013       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3014       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3015       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3016 
3017       /* set slvec1b = 0 */
3018       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3019 
3020       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3021       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3022       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3023       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3024 
3025       /* upper triangular part: bb1 = bb1 - B*x */
3026       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
3027 
3028       /* local diagonal sweep */
3029       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3030     }
3031     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3032   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3033     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3034   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3035     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3036   } else if (flag & SOR_EISENSTAT) {
3037     Vec               xx1;
3038     PetscBool         hasop;
3039     const PetscScalar *diag;
3040     PetscScalar       *sl,scale = (omega - 2.0)/omega;
3041     PetscInt          i,n;
3042 
3043     if (!mat->xx1) {
3044       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
3045       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
3046     }
3047     xx1 = mat->xx1;
3048     bb1 = mat->bb1;
3049 
3050     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
3051 
3052     if (!mat->diag) {
3053       /* this is wrong for same matrix with new nonzero values */
3054       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
3055       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
3056     }
3057     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
3058 
3059     if (hasop) {
3060       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
3061       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3062     } else {
3063       /*
3064           These two lines are replaced by code that may be a bit faster for a good compiler
3065       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
3066       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3067       */
3068       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3069       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3070       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3071       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3072       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
3073       if (omega == 1.0) {
3074         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
3075         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
3076       } else {
3077         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
3078         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
3079       }
3080       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3081       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3082       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3083       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3084     }
3085 
3086     /* multiply off-diagonal portion of matrix */
3087     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3088     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
3089     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
3090     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3091     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3092     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
3093     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3094     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3095     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3096     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
3097 
3098     /* local sweep */
3099     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
3100     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
3101   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3102   PetscFunctionReturn(0);
3103 }
3104 
3105 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
3106 {
3107   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
3108   PetscErrorCode ierr;
3109   Vec            lvec1,bb1;
3110 
3111   PetscFunctionBegin;
3112   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3113   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3114 
3115   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3116     if (flag & SOR_ZERO_INITIAL_GUESS) {
3117       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
3118       its--;
3119     }
3120 
3121     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
3122     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
3123     while (its--) {
3124       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3125 
3126       /* lower diagonal part: bb1 = bb - B^T*xx */
3127       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
3128       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
3129 
3130       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3131       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
3132       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3133 
3134       /* upper diagonal part: bb1 = bb1 - B*x */
3135       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
3136       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
3137 
3138       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3139 
3140       /* diagonal sweep */
3141       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3142     }
3143     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
3144     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3145   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3146   PetscFunctionReturn(0);
3147 }
3148 
3149 /*@
3150      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
3151          CSR format the local rows.
3152 
3153    Collective on MPI_Comm
3154 
3155    Input Parameters:
3156 +  comm - MPI communicator
3157 .  bs - the block size, only a block size of 1 is supported
3158 .  m - number of local rows (Cannot be PETSC_DECIDE)
3159 .  n - This value should be the same as the local size used in creating the
3160        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3161        calculated if N is given) For square matrices n is almost always m.
3162 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3163 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3164 .   i - row indices
3165 .   j - column indices
3166 -   a - matrix values
3167 
3168    Output Parameter:
3169 .   mat - the matrix
3170 
3171    Level: intermediate
3172 
3173    Notes:
3174        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3175      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3176      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3177 
3178        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3179 
3180 .keywords: matrix, aij, compressed row, sparse, parallel
3181 
3182 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3183           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3184 @*/
3185 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3186 {
3187   PetscErrorCode ierr;
3188 
3189 
3190   PetscFunctionBegin;
3191   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3192   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3193   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3194   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3195   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3196   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3197   PetscFunctionReturn(0);
3198 }
3199 
3200 
3201 /*@C
3202    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
3203    (the default parallel PETSc format).
3204 
3205    Collective on MPI_Comm
3206 
3207    Input Parameters:
3208 +  B - the matrix
3209 .  bs - the block size
3210 .  i - the indices into j for the start of each local row (starts with zero)
3211 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3212 -  v - optional values in the matrix
3213 
3214    Level: developer
3215 
3216 .keywords: matrix, aij, compressed row, sparse, parallel
3217 
3218 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
3219 @*/
3220 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3221 {
3222   PetscErrorCode ierr;
3223 
3224   PetscFunctionBegin;
3225   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3226   PetscFunctionReturn(0);
3227 }
3228 
3229 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3230 {
3231   PetscErrorCode ierr;
3232   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3233   PetscInt       *indx;
3234   PetscScalar    *values;
3235 
3236   PetscFunctionBegin;
3237   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3238   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3239     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
3240     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
3241     PetscInt       *bindx,rmax=a->rmax,j;
3242 
3243     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3244     mbs = m/bs; Nbs = N/cbs;
3245     if (n == PETSC_DECIDE) {
3246       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
3247     }
3248     /* Check sum(n) = Nbs */
3249     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3250     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
3251 
3252     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3253     rstart -= mbs;
3254 
3255     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3256     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
3257     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3258     for (i=0; i<mbs; i++) {
3259       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3260       nnz = nnz/bs;
3261       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3262       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3263       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3264     }
3265     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3266     ierr = PetscFree(bindx);CHKERRQ(ierr);
3267 
3268     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3269     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3270     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3271     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
3272     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3273     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3274   }
3275 
3276   /* numeric phase */
3277   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3278   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3279 
3280   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3281   for (i=0; i<m; i++) {
3282     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3283     Ii   = i + rstart;
3284     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3285     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3286   }
3287   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3288   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3289   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3290   PetscFunctionReturn(0);
3291 }
3292