xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 2b8d69ca7ea5fe9190df62c1dce3bbd66fce84dd)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 #undef __FUNCT__
11 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
12 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
13 {
14   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
15   PetscErrorCode ierr;
16 
17   PetscFunctionBegin;
18   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
19   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
20   PetscFunctionReturn(0);
21 }
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
25 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
26 {
27   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
28   PetscErrorCode ierr;
29 
30   PetscFunctionBegin;
31   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
32   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
33   PetscFunctionReturn(0);
34 }
35 
36 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
37   { \
38  \
39     brow = row/bs;  \
40     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
41     rmax = aimax[brow]; nrow = ailen[brow]; \
42     bcol = col/bs; \
43     ridx = row % bs; cidx = col % bs; \
44     low  = 0; high = nrow; \
45     while (high-low > 3) { \
46       t = (low+high)/2; \
47       if (rp[t] > bcol) high = t; \
48       else              low  = t; \
49     } \
50     for (_i=low; _i<high; _i++) { \
51       if (rp[_i] > bcol) break; \
52       if (rp[_i] == bcol) { \
53         bap = ap + bs2*_i + bs*cidx + ridx; \
54         if (addv == ADD_VALUES) *bap += value;  \
55         else                    *bap  = value;  \
56         goto a_noinsert; \
57       } \
58     } \
59     if (a->nonew == 1) goto a_noinsert; \
60     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
61     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
62     N = nrow++ - 1;  \
63     /* shift up all the later entries in this row */ \
64     for (ii=N; ii>=_i; ii--) { \
65       rp[ii+1] = rp[ii]; \
66       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
67     } \
68     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
69     rp[_i]                      = bcol;  \
70     ap[bs2*_i + bs*cidx + ridx] = value;  \
71     A->nonzerostate++;\
72 a_noinsert:; \
73     ailen[brow] = nrow; \
74   }
75 
76 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
77   { \
78     brow = row/bs;  \
79     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
80     rmax = bimax[brow]; nrow = bilen[brow]; \
81     bcol = col/bs; \
82     ridx = row % bs; cidx = col % bs; \
83     low  = 0; high = nrow; \
84     while (high-low > 3) { \
85       t = (low+high)/2; \
86       if (rp[t] > bcol) high = t; \
87       else              low  = t; \
88     } \
89     for (_i=low; _i<high; _i++) { \
90       if (rp[_i] > bcol) break; \
91       if (rp[_i] == bcol) { \
92         bap = ap + bs2*_i + bs*cidx + ridx; \
93         if (addv == ADD_VALUES) *bap += value;  \
94         else                    *bap  = value;  \
95         goto b_noinsert; \
96       } \
97     } \
98     if (b->nonew == 1) goto b_noinsert; \
99     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
100     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
101     N = nrow++ - 1;  \
102     /* shift up all the later entries in this row */ \
103     for (ii=N; ii>=_i; ii--) { \
104       rp[ii+1] = rp[ii]; \
105       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
106     } \
107     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
108     rp[_i]                      = bcol;  \
109     ap[bs2*_i + bs*cidx + ridx] = value;  \
110     B->nonzerostate++;\
111 b_noinsert:; \
112     bilen[brow] = nrow; \
113   }
114 
115 /* Only add/insert a(i,j) with i<=j (blocks).
116    Any a(i,j) with i>j input by user is ingored.
117 */
118 #undef __FUNCT__
119 #define __FUNCT__ "MatSetValues_MPISBAIJ"
120 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
121 {
122   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
123   MatScalar      value;
124   PetscBool      roworiented = baij->roworiented;
125   PetscErrorCode ierr;
126   PetscInt       i,j,row,col;
127   PetscInt       rstart_orig=mat->rmap->rstart;
128   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
129   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
130 
131   /* Some Variables required in the macro */
132   Mat          A     = baij->A;
133   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
134   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
135   MatScalar    *aa   =a->a;
136 
137   Mat         B     = baij->B;
138   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
139   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
140   MatScalar   *ba   =b->a;
141 
142   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
143   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
144   MatScalar *ap,*bap;
145 
146   /* for stash */
147   PetscInt  n_loc, *in_loc = NULL;
148   MatScalar *v_loc = NULL;
149 
150   PetscFunctionBegin;
151   if (!baij->donotstash) {
152     if (n > baij->n_loc) {
153       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
154       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
155       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
156       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
157 
158       baij->n_loc = n;
159     }
160     in_loc = baij->in_loc;
161     v_loc  = baij->v_loc;
162   }
163 
164   for (i=0; i<m; i++) {
165     if (im[i] < 0) continue;
166 #if defined(PETSC_USE_DEBUG)
167     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
168 #endif
169     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
170       row = im[i] - rstart_orig;              /* local row index */
171       for (j=0; j<n; j++) {
172         if (im[i]/bs > in[j]/bs) {
173           if (a->ignore_ltriangular) {
174             continue;    /* ignore lower triangular blocks */
175           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
176         }
177         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
178           col  = in[j] - cstart_orig;         /* local col index */
179           brow = row/bs; bcol = col/bs;
180           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
181           if (roworiented) value = v[i*n+j];
182           else             value = v[i+j*m];
183           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
184           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
185         } else if (in[j] < 0) continue;
186 #if defined(PETSC_USE_DEBUG)
187         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
188 #endif
189         else {  /* off-diag entry (B) */
190           if (mat->was_assembled) {
191             if (!baij->colmap) {
192               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
193             }
194 #if defined(PETSC_USE_CTABLE)
195             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
196             col  = col - 1;
197 #else
198             col = baij->colmap[in[j]/bs] - 1;
199 #endif
200             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
201               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
202               col  =  in[j];
203               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
204               B    = baij->B;
205               b    = (Mat_SeqBAIJ*)(B)->data;
206               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
207               ba   = b->a;
208             } else col += in[j]%bs;
209           } else col = in[j];
210           if (roworiented) value = v[i*n+j];
211           else             value = v[i+j*m];
212           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
213           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
214         }
215       }
216     } else {  /* off processor entry */
217       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
218       if (!baij->donotstash) {
219         mat->assembled = PETSC_FALSE;
220         n_loc          = 0;
221         for (j=0; j<n; j++) {
222           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
223           in_loc[n_loc] = in[j];
224           if (roworiented) {
225             v_loc[n_loc] = v[i*n+j];
226           } else {
227             v_loc[n_loc] = v[j*m+i];
228           }
229           n_loc++;
230         }
231         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ_Inlined"
240 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
241 {
242   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
243   PetscErrorCode    ierr;
244   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
245   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
246   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
247   PetscBool         roworiented=a->roworiented;
248   const PetscScalar *value     = v;
249   MatScalar         *ap,*aa = a->a,*bap;
250 
251   PetscFunctionBegin;
252   if (col < row) {
253     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
254     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
255   }
256   rp   = aj + ai[row];
257   ap   = aa + bs2*ai[row];
258   rmax = imax[row];
259   nrow = ailen[row];
260   value = v;
261   low   = 0;
262   high  = nrow;
263 
264   while (high-low > 7) {
265     t = (low+high)/2;
266     if (rp[t] > col) high = t;
267     else             low  = t;
268   }
269   for (i=low; i<high; i++) {
270     if (rp[i] > col) break;
271     if (rp[i] == col) {
272       bap = ap +  bs2*i;
273       if (roworiented) {
274         if (is == ADD_VALUES) {
275           for (ii=0; ii<bs; ii++) {
276             for (jj=ii; jj<bs2; jj+=bs) {
277               bap[jj] += *value++;
278             }
279           }
280         } else {
281           for (ii=0; ii<bs; ii++) {
282             for (jj=ii; jj<bs2; jj+=bs) {
283               bap[jj] = *value++;
284             }
285           }
286         }
287       } else {
288         if (is == ADD_VALUES) {
289           for (ii=0; ii<bs; ii++) {
290             for (jj=0; jj<bs; jj++) {
291               *bap++ += *value++;
292             }
293           }
294         } else {
295           for (ii=0; ii<bs; ii++) {
296             for (jj=0; jj<bs; jj++) {
297               *bap++  = *value++;
298             }
299           }
300         }
301       }
302       goto noinsert2;
303     }
304   }
305   if (nonew == 1) goto noinsert2;
306   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
307   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
308   N = nrow++ - 1; high++;
309   /* shift up all the later entries in this row */
310   for (ii=N; ii>=i; ii--) {
311     rp[ii+1] = rp[ii];
312     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
313   }
314   if (N >= i) {
315     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
316   }
317   rp[i] = col;
318   bap   = ap +  bs2*i;
319   if (roworiented) {
320     for (ii=0; ii<bs; ii++) {
321       for (jj=ii; jj<bs2; jj+=bs) {
322         bap[jj] = *value++;
323       }
324     }
325   } else {
326     for (ii=0; ii<bs; ii++) {
327       for (jj=0; jj<bs; jj++) {
328         *bap++ = *value++;
329       }
330     }
331   }
332   noinsert2:;
333   ailen[row] = nrow;
334   PetscFunctionReturn(0);
335 }
336 
337 #undef __FUNCT__
338 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined"
339 /*
340    This routine is exactly duplicated in mpibaij.c
341 */
342 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
343 {
344   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
345   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
346   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
347   PetscErrorCode    ierr;
348   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
349   PetscBool         roworiented=a->roworiented;
350   const PetscScalar *value     = v;
351   MatScalar         *ap,*aa = a->a,*bap;
352 
353   PetscFunctionBegin;
354   rp   = aj + ai[row];
355   ap   = aa + bs2*ai[row];
356   rmax = imax[row];
357   nrow = ailen[row];
358   low  = 0;
359   high = nrow;
360   value = v;
361   while (high-low > 7) {
362     t = (low+high)/2;
363     if (rp[t] > col) high = t;
364     else             low  = t;
365   }
366   for (i=low; i<high; i++) {
367     if (rp[i] > col) break;
368     if (rp[i] == col) {
369       bap = ap +  bs2*i;
370       if (roworiented) {
371         if (is == ADD_VALUES) {
372           for (ii=0; ii<bs; ii++) {
373             for (jj=ii; jj<bs2; jj+=bs) {
374               bap[jj] += *value++;
375             }
376           }
377         } else {
378           for (ii=0; ii<bs; ii++) {
379             for (jj=ii; jj<bs2; jj+=bs) {
380               bap[jj] = *value++;
381             }
382           }
383         }
384       } else {
385         if (is == ADD_VALUES) {
386           for (ii=0; ii<bs; ii++,value+=bs) {
387             for (jj=0; jj<bs; jj++) {
388               bap[jj] += value[jj];
389             }
390             bap += bs;
391           }
392         } else {
393           for (ii=0; ii<bs; ii++,value+=bs) {
394             for (jj=0; jj<bs; jj++) {
395               bap[jj]  = value[jj];
396             }
397             bap += bs;
398           }
399         }
400       }
401       goto noinsert2;
402     }
403   }
404   if (nonew == 1) goto noinsert2;
405   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
406   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
407   N = nrow++ - 1; high++;
408   /* shift up all the later entries in this row */
409   for (ii=N; ii>=i; ii--) {
410     rp[ii+1] = rp[ii];
411     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
412   }
413   if (N >= i) {
414     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
415   }
416   rp[i] = col;
417   bap   = ap +  bs2*i;
418   if (roworiented) {
419     for (ii=0; ii<bs; ii++) {
420       for (jj=ii; jj<bs2; jj+=bs) {
421         bap[jj] = *value++;
422       }
423     }
424   } else {
425     for (ii=0; ii<bs; ii++) {
426       for (jj=0; jj<bs; jj++) {
427         *bap++ = *value++;
428       }
429     }
430   }
431   noinsert2:;
432   ailen[row] = nrow;
433   PetscFunctionReturn(0);
434 }
435 
436 #undef __FUNCT__
437 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
438 /*
439     This routine could be optimized by removing the need for the block copy below and passing stride information
440   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
441 */
442 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
443 {
444   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
445   const MatScalar *value;
446   MatScalar       *barray     =baij->barray;
447   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
448   PetscErrorCode  ierr;
449   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
450   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
451   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
452 
453   PetscFunctionBegin;
454   if (!barray) {
455     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
456     baij->barray = barray;
457   }
458 
459   if (roworiented) {
460     stepval = (n-1)*bs;
461   } else {
462     stepval = (m-1)*bs;
463   }
464   for (i=0; i<m; i++) {
465     if (im[i] < 0) continue;
466 #if defined(PETSC_USE_DEBUG)
467     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
468 #endif
469     if (im[i] >= rstart && im[i] < rend) {
470       row = im[i] - rstart;
471       for (j=0; j<n; j++) {
472         if (im[i] > in[j]) {
473           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
474           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
475         }
476         /* If NumCol = 1 then a copy is not required */
477         if ((roworiented) && (n == 1)) {
478           barray = (MatScalar*) v + i*bs2;
479         } else if ((!roworiented) && (m == 1)) {
480           barray = (MatScalar*) v + j*bs2;
481         } else { /* Here a copy is required */
482           if (roworiented) {
483             value = v + i*(stepval+bs)*bs + j*bs;
484           } else {
485             value = v + j*(stepval+bs)*bs + i*bs;
486           }
487           for (ii=0; ii<bs; ii++,value+=stepval) {
488             for (jj=0; jj<bs; jj++) {
489               *barray++ = *value++;
490             }
491           }
492           barray -=bs2;
493         }
494 
495         if (in[j] >= cstart && in[j] < cend) {
496           col  = in[j] - cstart;
497           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
498         } else if (in[j] < 0) continue;
499 #if defined(PETSC_USE_DEBUG)
500         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
501 #endif
502         else {
503           if (mat->was_assembled) {
504             if (!baij->colmap) {
505               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
506             }
507 
508 #if defined(PETSC_USE_DEBUG)
509 #if defined(PETSC_USE_CTABLE)
510             { PetscInt data;
511               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
512               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
513             }
514 #else
515             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
516 #endif
517 #endif
518 #if defined(PETSC_USE_CTABLE)
519             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
520             col  = (col - 1)/bs;
521 #else
522             col = (baij->colmap[in[j]] - 1)/bs;
523 #endif
524             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
525               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
526               col  = in[j];
527             }
528           } else col = in[j];
529           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
530         }
531       }
532     } else {
533       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
534       if (!baij->donotstash) {
535         if (roworiented) {
536           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
537         } else {
538           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
539         }
540       }
541     }
542   }
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatGetValues_MPISBAIJ"
548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
549 {
550   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
551   PetscErrorCode ierr;
552   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
553   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
554 
555   PetscFunctionBegin;
556   for (i=0; i<m; i++) {
557     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
558     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
559     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
560       row = idxm[i] - bsrstart;
561       for (j=0; j<n; j++) {
562         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
563         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
564         if (idxn[j] >= bscstart && idxn[j] < bscend) {
565           col  = idxn[j] - bscstart;
566           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
567         } else {
568           if (!baij->colmap) {
569             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
570           }
571 #if defined(PETSC_USE_CTABLE)
572           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
573           data--;
574 #else
575           data = baij->colmap[idxn[j]/bs]-1;
576 #endif
577           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
578           else {
579             col  = data + idxn[j]%bs;
580             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
581           }
582         }
583       }
584     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
585   }
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatNorm_MPISBAIJ"
591 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
592 {
593   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
594   PetscErrorCode ierr;
595   PetscReal      sum[2],*lnorm2;
596 
597   PetscFunctionBegin;
598   if (baij->size == 1) {
599     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
600   } else {
601     if (type == NORM_FROBENIUS) {
602       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
603       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
604       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
605       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
606       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
607       ierr    = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
608       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
609       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
610     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
611       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
612       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
613       PetscReal    *rsum,*rsum2,vabs;
614       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
615       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
616       MatScalar    *v;
617 
618       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
619       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
620       /* Amat */
621       v = amat->a; jj = amat->j;
622       for (brow=0; brow<mbs; brow++) {
623         grow = bs*(rstart + brow);
624         nz   = amat->i[brow+1] - amat->i[brow];
625         for (bcol=0; bcol<nz; bcol++) {
626           gcol = bs*(rstart + *jj); jj++;
627           for (col=0; col<bs; col++) {
628             for (row=0; row<bs; row++) {
629               vabs            = PetscAbsScalar(*v); v++;
630               rsum[gcol+col] += vabs;
631               /* non-diagonal block */
632               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
633             }
634           }
635         }
636       }
637       /* Bmat */
638       v = bmat->a; jj = bmat->j;
639       for (brow=0; brow<mbs; brow++) {
640         grow = bs*(rstart + brow);
641         nz = bmat->i[brow+1] - bmat->i[brow];
642         for (bcol=0; bcol<nz; bcol++) {
643           gcol = bs*garray[*jj]; jj++;
644           for (col=0; col<bs; col++) {
645             for (row=0; row<bs; row++) {
646               vabs            = PetscAbsScalar(*v); v++;
647               rsum[gcol+col] += vabs;
648               rsum[grow+row] += vabs;
649             }
650           }
651         }
652       }
653       ierr  = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
654       *norm = 0.0;
655       for (col=0; col<mat->cmap->N; col++) {
656         if (rsum2[col] > *norm) *norm = rsum2[col];
657       }
658       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
659     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
660   }
661   PetscFunctionReturn(0);
662 }
663 
664 #undef __FUNCT__
665 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
666 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
667 {
668   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
669   PetscErrorCode ierr;
670   PetscInt       nstash,reallocs;
671 
672   PetscFunctionBegin;
673   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
674 
675   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
676   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
677   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
678   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
679   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
680   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
681   PetscFunctionReturn(0);
682 }
683 
684 #undef __FUNCT__
685 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
686 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
687 {
688   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
689   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
690   PetscErrorCode ierr;
691   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
692   PetscInt       *row,*col;
693   PetscBool      other_disassembled;
694   PetscMPIInt    n;
695   PetscBool      r1,r2,r3;
696   MatScalar      *val;
697 
698   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
699   PetscFunctionBegin;
700   if (!baij->donotstash &&  !mat->nooffprocentries) {
701     while (1) {
702       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
703       if (!flg) break;
704 
705       for (i=0; i<n;) {
706         /* Now identify the consecutive vals belonging to the same row */
707         for (j=i,rstart=row[j]; j<n; j++) {
708           if (row[j] != rstart) break;
709         }
710         if (j < n) ncols = j-i;
711         else       ncols = n-i;
712         /* Now assemble all these values with a single function call */
713         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
714         i    = j;
715       }
716     }
717     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
718     /* Now process the block-stash. Since the values are stashed column-oriented,
719        set the roworiented flag to column oriented, and after MatSetValues()
720        restore the original flags */
721     r1 = baij->roworiented;
722     r2 = a->roworiented;
723     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
724 
725     baij->roworiented = PETSC_FALSE;
726     a->roworiented    = PETSC_FALSE;
727 
728     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
729     while (1) {
730       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
731       if (!flg) break;
732 
733       for (i=0; i<n;) {
734         /* Now identify the consecutive vals belonging to the same row */
735         for (j=i,rstart=row[j]; j<n; j++) {
736           if (row[j] != rstart) break;
737         }
738         if (j < n) ncols = j-i;
739         else       ncols = n-i;
740         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
741         i    = j;
742       }
743     }
744     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
745 
746     baij->roworiented = r1;
747     a->roworiented    = r2;
748 
749     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
750   }
751 
752   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
753   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
754 
755   /* determine if any processor has disassembled, if so we must
756      also disassemble ourselfs, in order that we may reassemble. */
757   /*
758      if nonzero structure of submatrix B cannot change then we know that
759      no processor disassembled thus we can skip this stuff
760   */
761   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
762     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
763     if (mat->was_assembled && !other_disassembled) {
764       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
765     }
766   }
767 
768   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
769     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
770   }
771   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
772   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
773 
774   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
775 
776   baij->rowvalues = 0;
777 
778   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
779   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
780     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
781     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
782   }
783   PetscFunctionReturn(0);
784 }
785 
786 extern PetscErrorCode MatView_SeqSBAIJ(Mat,PetscViewer);
787 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
788 #include <petscdraw.h>
789 #undef __FUNCT__
790 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
791 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
792 {
793   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
794   PetscErrorCode    ierr;
795   PetscInt          bs   = mat->rmap->bs;
796   PetscMPIInt       rank = baij->rank;
797   PetscBool         iascii,isdraw;
798   PetscViewer       sviewer;
799   PetscViewerFormat format;
800 
801   PetscFunctionBegin;
802   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
803   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
804   if (iascii) {
805     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
806     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
807       MatInfo info;
808       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
809       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
810       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
811       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
812       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
813       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
814       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
815       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
816       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
817       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
818       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
819       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
820       PetscFunctionReturn(0);
821     } else if (format == PETSC_VIEWER_ASCII_INFO) {
822       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
823       PetscFunctionReturn(0);
824     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
825       PetscFunctionReturn(0);
826     }
827   }
828 
829   if (isdraw) {
830     PetscDraw draw;
831     PetscBool isnull;
832     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
833     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
834   }
835 
836   {
837     /* assemble the entire matrix onto first processor. */
838     Mat          A;
839     Mat_SeqSBAIJ *Aloc;
840     Mat_SeqBAIJ  *Bloc;
841     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
842     MatScalar    *a;
843     const char   *matname;
844 
845     /* Should this be the same type as mat? */
846     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
847     if (!rank) {
848       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
849     } else {
850       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
851     }
852     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
853     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
854     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
855     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
856 
857     /* copy over the A part */
858     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
859     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
860     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
861 
862     for (i=0; i<mbs; i++) {
863       rvals[0] = bs*(baij->rstartbs + i);
864       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
865       for (j=ai[i]; j<ai[i+1]; j++) {
866         col = (baij->cstartbs+aj[j])*bs;
867         for (k=0; k<bs; k++) {
868           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
869           col++;
870           a += bs;
871         }
872       }
873     }
874     /* copy over the B part */
875     Bloc = (Mat_SeqBAIJ*)baij->B->data;
876     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
877     for (i=0; i<mbs; i++) {
878 
879       rvals[0] = bs*(baij->rstartbs + i);
880       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
881       for (j=ai[i]; j<ai[i+1]; j++) {
882         col = baij->garray[aj[j]]*bs;
883         for (k=0; k<bs; k++) {
884           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
885           col++;
886           a += bs;
887         }
888       }
889     }
890     ierr = PetscFree(rvals);CHKERRQ(ierr);
891     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
892     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
893     /*
894        Everyone has to call to draw the matrix since the graphics waits are
895        synchronized across all processors that share the PetscDraw object
896     */
897     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
898     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
899     if (!rank) {
900       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
901       ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
902     }
903     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
904     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
905     ierr = MatDestroy(&A);CHKERRQ(ierr);
906   }
907   PetscFunctionReturn(0);
908 }
909 
910 #undef __FUNCT__
911 #define __FUNCT__ "MatView_MPISBAIJ"
912 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
913 {
914   PetscErrorCode ierr;
915   PetscBool      iascii,isdraw,issocket,isbinary;
916 
917   PetscFunctionBegin;
918   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
919   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
920   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
921   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
922   if (iascii || isdraw || issocket || isbinary) {
923     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
924   }
925   PetscFunctionReturn(0);
926 }
927 
928 #undef __FUNCT__
929 #define __FUNCT__ "MatDestroy_MPISBAIJ"
930 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
931 {
932   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
933   PetscErrorCode ierr;
934 
935   PetscFunctionBegin;
936 #if defined(PETSC_USE_LOG)
937   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
938 #endif
939   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
940   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
941   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
942   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
943 #if defined(PETSC_USE_CTABLE)
944   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
945 #else
946   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
947 #endif
948   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
949   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
950   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
951   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
952   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
953   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
954   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
955   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
956   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
957   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
958   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
959   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
960   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
961   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
962   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
963 #if defined(PETSC_USE_REAL_MAT_SINGLE)
964   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
965 #endif
966   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
967   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
968   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
969   ierr = PetscFree(mat->data);CHKERRQ(ierr);
970 
971   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
972   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
973   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
974   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
975   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
976   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
977 #if defined(PETSC_HAVE_ELEMENTAL)
978   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
979 #endif
980   PetscFunctionReturn(0);
981 }
982 
983 #undef __FUNCT__
984 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
985 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
986 {
987   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
988   PetscErrorCode    ierr;
989   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
990   PetscScalar       *from;
991   const PetscScalar *x;
992 
993   PetscFunctionBegin;
994   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
995   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
996 
997   /* diagonal part */
998   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
999   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1000 
1001   /* subdiagonal part */
1002   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1003 
1004   /* copy x into the vec slvec0 */
1005   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1006   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1007 
1008   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1009   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1010   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1011 
1012   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1013   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1014   /* supperdiagonal part */
1015   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1016   PetscFunctionReturn(0);
1017 }
1018 
1019 #undef __FUNCT__
1020 #define __FUNCT__ "MatMult_MPISBAIJ"
1021 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1022 {
1023   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1024   PetscErrorCode    ierr;
1025   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1026   PetscScalar       *from;
1027   const PetscScalar *x;
1028 
1029   PetscFunctionBegin;
1030   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1031   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1032 
1033   /* diagonal part */
1034   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1035   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1036 
1037   /* subdiagonal part */
1038   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1039 
1040   /* copy x into the vec slvec0 */
1041   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1042   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1043 
1044   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1045   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1046   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1047 
1048   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1049   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1050   /* supperdiagonal part */
1051   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 #undef __FUNCT__
1056 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
1057 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1058 {
1059   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1060   PetscErrorCode ierr;
1061   PetscInt       nt;
1062 
1063   PetscFunctionBegin;
1064   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1065   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1066 
1067   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1068   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1069 
1070   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1071   /* do diagonal part */
1072   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1073   /* do supperdiagonal part */
1074   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1075   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1076   /* do subdiagonal part */
1077   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1078   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1079   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 #undef __FUNCT__
1084 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1085 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1086 {
1087   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1088   PetscErrorCode    ierr;
1089   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1090   PetscScalar       *from,zero=0.0;
1091   const PetscScalar *x;
1092 
1093   PetscFunctionBegin;
1094   /*
1095   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
1096   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
1097   */
1098   /* diagonal part */
1099   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1100   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1101 
1102   /* subdiagonal part */
1103   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1104 
1105   /* copy x into the vec slvec0 */
1106   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1107   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1108   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1109   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1110 
1111   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114 
1115   /* supperdiagonal part */
1116   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 #undef __FUNCT__
1121 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1122 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1123 {
1124   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1125   PetscErrorCode ierr;
1126 
1127   PetscFunctionBegin;
1128   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1129   /* do diagonal part */
1130   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1131   /* do supperdiagonal part */
1132   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1133   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1134 
1135   /* do subdiagonal part */
1136   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1137   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1138   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 /*
1143   This only works correctly for square matrices where the subblock A->A is the
1144    diagonal block
1145 */
1146 #undef __FUNCT__
1147 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1148 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1149 {
1150   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1151   PetscErrorCode ierr;
1152 
1153   PetscFunctionBegin;
1154   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1155   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 #undef __FUNCT__
1160 #define __FUNCT__ "MatScale_MPISBAIJ"
1161 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1162 {
1163   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1164   PetscErrorCode ierr;
1165 
1166   PetscFunctionBegin;
1167   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1168   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 #undef __FUNCT__
1173 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1174 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1175 {
1176   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1177   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1178   PetscErrorCode ierr;
1179   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1180   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1181   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1182 
1183   PetscFunctionBegin;
1184   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1185   mat->getrowactive = PETSC_TRUE;
1186 
1187   if (!mat->rowvalues && (idx || v)) {
1188     /*
1189         allocate enough space to hold information from the longest row.
1190     */
1191     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1192     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1193     PetscInt     max = 1,mbs = mat->mbs,tmp;
1194     for (i=0; i<mbs; i++) {
1195       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1196       if (max < tmp) max = tmp;
1197     }
1198     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1199   }
1200 
1201   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1202   lrow = row - brstart;  /* local row index */
1203 
1204   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1205   if (!v)   {pvA = 0; pvB = 0;}
1206   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1207   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1208   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1209   nztot = nzA + nzB;
1210 
1211   cmap = mat->garray;
1212   if (v  || idx) {
1213     if (nztot) {
1214       /* Sort by increasing column numbers, assuming A and B already sorted */
1215       PetscInt imark = -1;
1216       if (v) {
1217         *v = v_p = mat->rowvalues;
1218         for (i=0; i<nzB; i++) {
1219           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1220           else break;
1221         }
1222         imark = i;
1223         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1224         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1225       }
1226       if (idx) {
1227         *idx = idx_p = mat->rowindices;
1228         if (imark > -1) {
1229           for (i=0; i<imark; i++) {
1230             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1231           }
1232         } else {
1233           for (i=0; i<nzB; i++) {
1234             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1235             else break;
1236           }
1237           imark = i;
1238         }
1239         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1240         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1241       }
1242     } else {
1243       if (idx) *idx = 0;
1244       if (v)   *v   = 0;
1245     }
1246   }
1247   *nz  = nztot;
1248   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1249   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1255 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1256 {
1257   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1258 
1259   PetscFunctionBegin;
1260   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1261   baij->getrowactive = PETSC_FALSE;
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 #undef __FUNCT__
1266 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1267 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1268 {
1269   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1270   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1271 
1272   PetscFunctionBegin;
1273   aA->getrow_utriangular = PETSC_TRUE;
1274   PetscFunctionReturn(0);
1275 }
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1278 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1279 {
1280   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1281   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1282 
1283   PetscFunctionBegin;
1284   aA->getrow_utriangular = PETSC_FALSE;
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 #undef __FUNCT__
1289 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1290 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1291 {
1292   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1293   PetscErrorCode ierr;
1294 
1295   PetscFunctionBegin;
1296   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1297   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1298   PetscFunctionReturn(0);
1299 }
1300 
1301 #undef __FUNCT__
1302 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1303 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1304 {
1305   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1306   PetscErrorCode ierr;
1307 
1308   PetscFunctionBegin;
1309   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1310   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1311   PetscFunctionReturn(0);
1312 }
1313 
1314 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ()
1315    Input: isrow       - distributed(parallel),
1316           iscol_local - locally owned (seq)
1317 */
1318 #undef __FUNCT__
1319 #define __FUNCT__ "ISEqual_private"
1320 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1321 {
1322   PetscErrorCode ierr;
1323   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1324   const PetscInt *ptr1,*ptr2;
1325 
1326   PetscFunctionBegin;
1327   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1328   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1329   if (sz1 > sz2) {
1330     *flg = PETSC_FALSE;
1331     PetscFunctionReturn(0);
1332   }
1333 
1334   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1335   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1336 
1337   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1338   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1339   ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr);
1340   ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr);
1341   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1342   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1343 
1344   nmatch=0;
1345   k     = 0;
1346   for (i=0; i<sz1; i++){
1347     for (j=k; j<sz2; j++){
1348       if (a1[i] == a2[j]) {
1349         k = j; nmatch++;
1350         break;
1351       }
1352     }
1353   }
1354   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1355   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1356   ierr = PetscFree(a1);CHKERRQ(ierr);
1357   ierr = PetscFree(a2);CHKERRQ(ierr);
1358   if (nmatch < sz1) {
1359     *flg = PETSC_FALSE;
1360   } else {
1361     *flg = PETSC_TRUE;
1362   }
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ"
1368 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1369 {
1370   PetscErrorCode ierr;
1371   IS             iscol_local;
1372   PetscInt       csize;
1373   PetscBool      isequal;
1374 
1375   PetscFunctionBegin;
1376   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1377   if (call == MAT_REUSE_MATRIX) {
1378     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1379     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1380   } else {
1381     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1382     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1383     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1384   }
1385 
1386   /* now call MatGetSubMatrix_MPIBAIJ() */
1387   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1388   if (call == MAT_INITIAL_MATRIX) {
1389     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1390     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1391   }
1392   PetscFunctionReturn(0);
1393 }
1394 
1395 #undef __FUNCT__
1396 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1397 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1398 {
1399   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1400   PetscErrorCode ierr;
1401 
1402   PetscFunctionBegin;
1403   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1404   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 #undef __FUNCT__
1409 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1410 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1411 {
1412   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1413   Mat            A  = a->A,B = a->B;
1414   PetscErrorCode ierr;
1415   PetscReal      isend[5],irecv[5];
1416 
1417   PetscFunctionBegin;
1418   info->block_size = (PetscReal)matin->rmap->bs;
1419 
1420   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1421 
1422   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1423   isend[3] = info->memory;  isend[4] = info->mallocs;
1424 
1425   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1426 
1427   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1428   isend[3] += info->memory;  isend[4] += info->mallocs;
1429   if (flag == MAT_LOCAL) {
1430     info->nz_used      = isend[0];
1431     info->nz_allocated = isend[1];
1432     info->nz_unneeded  = isend[2];
1433     info->memory       = isend[3];
1434     info->mallocs      = isend[4];
1435   } else if (flag == MAT_GLOBAL_MAX) {
1436     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1437 
1438     info->nz_used      = irecv[0];
1439     info->nz_allocated = irecv[1];
1440     info->nz_unneeded  = irecv[2];
1441     info->memory       = irecv[3];
1442     info->mallocs      = irecv[4];
1443   } else if (flag == MAT_GLOBAL_SUM) {
1444     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1445 
1446     info->nz_used      = irecv[0];
1447     info->nz_allocated = irecv[1];
1448     info->nz_unneeded  = irecv[2];
1449     info->memory       = irecv[3];
1450     info->mallocs      = irecv[4];
1451   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1452   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1453   info->fill_ratio_needed = 0;
1454   info->factor_mallocs    = 0;
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1460 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1461 {
1462   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1463   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1464   PetscErrorCode ierr;
1465 
1466   PetscFunctionBegin;
1467   switch (op) {
1468   case MAT_NEW_NONZERO_LOCATIONS:
1469   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1470   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1471   case MAT_KEEP_NONZERO_PATTERN:
1472   case MAT_NEW_NONZERO_LOCATION_ERR:
1473     MatCheckPreallocated(A,1);
1474     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1475     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1476     break;
1477   case MAT_ROW_ORIENTED:
1478     MatCheckPreallocated(A,1);
1479     a->roworiented = flg;
1480 
1481     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1482     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1483     break;
1484   case MAT_NEW_DIAGONALS:
1485     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1486     break;
1487   case MAT_IGNORE_OFF_PROC_ENTRIES:
1488     a->donotstash = flg;
1489     break;
1490   case MAT_USE_HASH_TABLE:
1491     a->ht_flag = flg;
1492     break;
1493   case MAT_HERMITIAN:
1494     MatCheckPreallocated(A,1);
1495     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1496     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1497 
1498     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1499     break;
1500   case MAT_SPD:
1501     A->spd_set = PETSC_TRUE;
1502     A->spd     = flg;
1503     if (flg) {
1504       A->symmetric                  = PETSC_TRUE;
1505       A->structurally_symmetric     = PETSC_TRUE;
1506       A->symmetric_set              = PETSC_TRUE;
1507       A->structurally_symmetric_set = PETSC_TRUE;
1508     }
1509     break;
1510   case MAT_SYMMETRIC:
1511     MatCheckPreallocated(A,1);
1512     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1513     break;
1514   case MAT_STRUCTURALLY_SYMMETRIC:
1515     MatCheckPreallocated(A,1);
1516     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1517     break;
1518   case MAT_SYMMETRY_ETERNAL:
1519     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1520     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1521     break;
1522   case MAT_IGNORE_LOWER_TRIANGULAR:
1523     aA->ignore_ltriangular = flg;
1524     break;
1525   case MAT_ERROR_LOWER_TRIANGULAR:
1526     aA->ignore_ltriangular = flg;
1527     break;
1528   case MAT_GETROW_UPPERTRIANGULAR:
1529     aA->getrow_utriangular = flg;
1530     break;
1531   default:
1532     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1533   }
1534   PetscFunctionReturn(0);
1535 }
1536 
1537 #undef __FUNCT__
1538 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1539 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1540 {
1541   PetscErrorCode ierr;
1542 
1543   PetscFunctionBegin;
1544   if (MAT_INITIAL_MATRIX || *B != A) {
1545     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1546   }
1547   PetscFunctionReturn(0);
1548 }
1549 
1550 #undef __FUNCT__
1551 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1552 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1553 {
1554   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1555   Mat            a     = baij->A, b=baij->B;
1556   PetscErrorCode ierr;
1557   PetscInt       nv,m,n;
1558   PetscBool      flg;
1559 
1560   PetscFunctionBegin;
1561   if (ll != rr) {
1562     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1563     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1564   }
1565   if (!ll) PetscFunctionReturn(0);
1566 
1567   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1568   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1569 
1570   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1571   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1572 
1573   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1574 
1575   /* left diagonalscale the off-diagonal part */
1576   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1577 
1578   /* scale the diagonal part */
1579   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1580 
1581   /* right diagonalscale the off-diagonal part */
1582   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1583   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1584   PetscFunctionReturn(0);
1585 }
1586 
1587 #undef __FUNCT__
1588 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1589 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1590 {
1591   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1592   PetscErrorCode ierr;
1593 
1594   PetscFunctionBegin;
1595   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1596   PetscFunctionReturn(0);
1597 }
1598 
1599 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1600 
1601 #undef __FUNCT__
1602 #define __FUNCT__ "MatEqual_MPISBAIJ"
1603 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1604 {
1605   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1606   Mat            a,b,c,d;
1607   PetscBool      flg;
1608   PetscErrorCode ierr;
1609 
1610   PetscFunctionBegin;
1611   a = matA->A; b = matA->B;
1612   c = matB->A; d = matB->B;
1613 
1614   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1615   if (flg) {
1616     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1617   }
1618   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1619   PetscFunctionReturn(0);
1620 }
1621 
1622 #undef __FUNCT__
1623 #define __FUNCT__ "MatCopy_MPISBAIJ"
1624 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1625 {
1626   PetscErrorCode ierr;
1627   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1628   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1629 
1630   PetscFunctionBegin;
1631   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1632   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1633     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1634     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1635     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1636   } else {
1637     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1638     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1639   }
1640   PetscFunctionReturn(0);
1641 }
1642 
1643 #undef __FUNCT__
1644 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1645 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1646 {
1647   PetscErrorCode ierr;
1648 
1649   PetscFunctionBegin;
1650   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1651   PetscFunctionReturn(0);
1652 }
1653 
1654 #undef __FUNCT__
1655 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1656 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1657 {
1658   PetscErrorCode ierr;
1659   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1660   PetscBLASInt   bnz,one=1;
1661   Mat_SeqSBAIJ   *xa,*ya;
1662   Mat_SeqBAIJ    *xb,*yb;
1663 
1664   PetscFunctionBegin;
1665   if (str == SAME_NONZERO_PATTERN) {
1666     PetscScalar alpha = a;
1667     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1668     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1669     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1670     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1671     xb   = (Mat_SeqBAIJ*)xx->B->data;
1672     yb   = (Mat_SeqBAIJ*)yy->B->data;
1673     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1674     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1675     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1676   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1677     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1678     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1679     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1680   } else {
1681     Mat      B;
1682     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1683     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1684     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1685     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1686     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1687     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1688     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1689     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1690     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1691     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1692     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1693     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1694     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1695     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1696     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1697     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1698     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1699     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1700     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1701     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1702   }
1703   PetscFunctionReturn(0);
1704 }
1705 
1706 #undef __FUNCT__
1707 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1708 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1709 {
1710   PetscErrorCode ierr;
1711   PetscInt       i;
1712   PetscBool      flg,sorted;
1713 
1714   PetscFunctionBegin;
1715   for (i = 0; i < n; i++) {
1716     ierr = ISSorted(irow[i],&sorted);CHKERRQ(ierr);
1717     if (!sorted) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row index set %d not sorted",i);
1718     ierr = ISSorted(icol[i],&sorted);CHKERRQ(ierr);
1719     if (!sorted) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Column index set %d not sorted",i);
1720   }
1721   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1722   for (i=0; i<n; i++) {
1723     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1724     if (!flg) { /* *B[i] is non-symmetric, set flag */
1725       ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr);
1726     }
1727   }
1728   PetscFunctionReturn(0);
1729 }
1730 
1731 #undef __FUNCT__
1732 #define __FUNCT__ "MatShift_MPISBAIJ"
1733 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1734 {
1735   PetscErrorCode ierr;
1736   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1737   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1738 
1739   PetscFunctionBegin;
1740   if (!Y->preallocated) {
1741     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1742   } else if (!aij->nz) {
1743     PetscInt nonew = aij->nonew;
1744     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1745     aij->nonew = nonew;
1746   }
1747   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1748   PetscFunctionReturn(0);
1749 }
1750 
1751 #undef __FUNCT__
1752 #define __FUNCT__ "MatMissingDiagonal_MPISBAIJ"
1753 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1754 {
1755   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1756   PetscErrorCode ierr;
1757 
1758   PetscFunctionBegin;
1759   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1760   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
1761   if (d) {
1762     PetscInt rstart;
1763     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1764     *d += rstart/A->rmap->bs;
1765 
1766   }
1767   PetscFunctionReturn(0);
1768 }
1769 
1770 
1771 /* -------------------------------------------------------------------*/
1772 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1773                                        MatGetRow_MPISBAIJ,
1774                                        MatRestoreRow_MPISBAIJ,
1775                                        MatMult_MPISBAIJ,
1776                                /*  4*/ MatMultAdd_MPISBAIJ,
1777                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1778                                        MatMultAdd_MPISBAIJ,
1779                                        0,
1780                                        0,
1781                                        0,
1782                                /* 10*/ 0,
1783                                        0,
1784                                        0,
1785                                        MatSOR_MPISBAIJ,
1786                                        MatTranspose_MPISBAIJ,
1787                                /* 15*/ MatGetInfo_MPISBAIJ,
1788                                        MatEqual_MPISBAIJ,
1789                                        MatGetDiagonal_MPISBAIJ,
1790                                        MatDiagonalScale_MPISBAIJ,
1791                                        MatNorm_MPISBAIJ,
1792                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1793                                        MatAssemblyEnd_MPISBAIJ,
1794                                        MatSetOption_MPISBAIJ,
1795                                        MatZeroEntries_MPISBAIJ,
1796                                /* 24*/ 0,
1797                                        0,
1798                                        0,
1799                                        0,
1800                                        0,
1801                                /* 29*/ MatSetUp_MPISBAIJ,
1802                                        0,
1803                                        0,
1804                                        0,
1805                                        0,
1806                                /* 34*/ MatDuplicate_MPISBAIJ,
1807                                        0,
1808                                        0,
1809                                        0,
1810                                        0,
1811                                /* 39*/ MatAXPY_MPISBAIJ,
1812                                        MatGetSubMatrices_MPISBAIJ,
1813                                        MatIncreaseOverlap_MPISBAIJ,
1814                                        MatGetValues_MPISBAIJ,
1815                                        MatCopy_MPISBAIJ,
1816                                /* 44*/ 0,
1817                                        MatScale_MPISBAIJ,
1818                                        MatShift_MPISBAIJ,
1819                                        0,
1820                                        0,
1821                                /* 49*/ 0,
1822                                        0,
1823                                        0,
1824                                        0,
1825                                        0,
1826                                /* 54*/ 0,
1827                                        0,
1828                                        MatSetUnfactored_MPISBAIJ,
1829                                        0,
1830                                        MatSetValuesBlocked_MPISBAIJ,
1831                                /* 59*/ MatGetSubMatrix_MPISBAIJ,
1832                                        0,
1833                                        0,
1834                                        0,
1835                                        0,
1836                                /* 64*/ 0,
1837                                        0,
1838                                        0,
1839                                        0,
1840                                        0,
1841                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1842                                        0,
1843                                        0,
1844                                        0,
1845                                        0,
1846                                /* 74*/ 0,
1847                                        0,
1848                                        0,
1849                                        0,
1850                                        0,
1851                                /* 79*/ 0,
1852                                        0,
1853                                        0,
1854                                        0,
1855                                        MatLoad_MPISBAIJ,
1856                                /* 84*/ 0,
1857                                        0,
1858                                        0,
1859                                        0,
1860                                        0,
1861                                /* 89*/ 0,
1862                                        0,
1863                                        0,
1864                                        0,
1865                                        0,
1866                                /* 94*/ 0,
1867                                        0,
1868                                        0,
1869                                        0,
1870                                        0,
1871                                /* 99*/ 0,
1872                                        0,
1873                                        0,
1874                                        0,
1875                                        0,
1876                                /*104*/ 0,
1877                                        MatRealPart_MPISBAIJ,
1878                                        MatImaginaryPart_MPISBAIJ,
1879                                        MatGetRowUpperTriangular_MPISBAIJ,
1880                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1881                                /*109*/ 0,
1882                                        0,
1883                                        0,
1884                                        0,
1885                                        MatMissingDiagonal_MPISBAIJ,
1886                                /*114*/ 0,
1887                                        0,
1888                                        0,
1889                                        0,
1890                                        0,
1891                                /*119*/ 0,
1892                                        0,
1893                                        0,
1894                                        0,
1895                                        0,
1896                                /*124*/ 0,
1897                                        0,
1898                                        0,
1899                                        0,
1900                                        0,
1901                                /*129*/ 0,
1902                                        0,
1903                                        0,
1904                                        0,
1905                                        0,
1906                                /*134*/ 0,
1907                                        0,
1908                                        0,
1909                                        0,
1910                                        0,
1911                                /*139*/ 0,
1912                                        0,
1913                                        0,
1914                                        0,
1915                                        0,
1916                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
1917 };
1918 
1919 #undef __FUNCT__
1920 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1921 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1922 {
1923   PetscFunctionBegin;
1924   *a = ((Mat_MPISBAIJ*)A->data)->A;
1925   PetscFunctionReturn(0);
1926 }
1927 
1928 #undef __FUNCT__
1929 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1930 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1931 {
1932   Mat_MPISBAIJ   *b;
1933   PetscErrorCode ierr;
1934   PetscInt       i,mbs,Mbs;
1935 
1936   PetscFunctionBegin;
1937   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1938   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1939   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1940   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1941 
1942   b   = (Mat_MPISBAIJ*)B->data;
1943   mbs = B->rmap->n/bs;
1944   Mbs = B->rmap->N/bs;
1945   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1946 
1947   B->rmap->bs = bs;
1948   b->bs2      = bs*bs;
1949   b->mbs      = mbs;
1950   b->Mbs      = Mbs;
1951   b->nbs      = B->cmap->n/bs;
1952   b->Nbs      = B->cmap->N/bs;
1953 
1954   for (i=0; i<=b->size; i++) {
1955     b->rangebs[i] = B->rmap->range[i]/bs;
1956   }
1957   b->rstartbs = B->rmap->rstart/bs;
1958   b->rendbs   = B->rmap->rend/bs;
1959 
1960   b->cstartbs = B->cmap->rstart/bs;
1961   b->cendbs   = B->cmap->rend/bs;
1962 
1963   if (!B->preallocated) {
1964     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1965     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1966     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1967     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1968     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1969     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1970     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1971     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1972     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1973   }
1974 
1975   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1976   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1977 
1978   B->preallocated = PETSC_TRUE;
1979   PetscFunctionReturn(0);
1980 }
1981 
1982 #undef __FUNCT__
1983 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1984 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1985 {
1986   PetscInt       m,rstart,cstart,cend;
1987   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1988   const PetscInt *JJ    =0;
1989   PetscScalar    *values=0;
1990   PetscErrorCode ierr;
1991 
1992   PetscFunctionBegin;
1993   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1994   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1995   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1996   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1997   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1998   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1999   m      = B->rmap->n/bs;
2000   rstart = B->rmap->rstart/bs;
2001   cstart = B->cmap->rstart/bs;
2002   cend   = B->cmap->rend/bs;
2003 
2004   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2005   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2006   for (i=0; i<m; i++) {
2007     nz = ii[i+1] - ii[i];
2008     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2009     nz_max = PetscMax(nz_max,nz);
2010     JJ     = jj + ii[i];
2011     for (j=0; j<nz; j++) {
2012       if (*JJ >= cstart) break;
2013       JJ++;
2014     }
2015     d = 0;
2016     for (; j<nz; j++) {
2017       if (*JJ++ >= cend) break;
2018       d++;
2019     }
2020     d_nnz[i] = d;
2021     o_nnz[i] = nz - d;
2022   }
2023   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2024   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2025 
2026   values = (PetscScalar*)V;
2027   if (!values) {
2028     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2029     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2030   }
2031   for (i=0; i<m; i++) {
2032     PetscInt          row    = i + rstart;
2033     PetscInt          ncols  = ii[i+1] - ii[i];
2034     const PetscInt    *icols = jj + ii[i];
2035     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2036     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2037   }
2038 
2039   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2040   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2041   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2042   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2043   PetscFunctionReturn(0);
2044 }
2045 
2046 /*MC
2047    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2048    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2049    the matrix is stored.
2050 
2051   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2052   can call MatSetOption(Mat, MAT_HERMITIAN);
2053 
2054    Options Database Keys:
2055 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2056 
2057   Level: beginner
2058 
2059 .seealso: MatCreateMPISBAIJ
2060 M*/
2061 
2062 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
2063 
2064 #undef __FUNCT__
2065 #define __FUNCT__ "MatCreate_MPISBAIJ"
2066 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2067 {
2068   Mat_MPISBAIJ   *b;
2069   PetscErrorCode ierr;
2070   PetscBool      flg = PETSC_FALSE;
2071 
2072   PetscFunctionBegin;
2073   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2074   B->data = (void*)b;
2075   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2076 
2077   B->ops->destroy = MatDestroy_MPISBAIJ;
2078   B->ops->view    = MatView_MPISBAIJ;
2079   B->assembled    = PETSC_FALSE;
2080   B->insertmode   = NOT_SET_VALUES;
2081 
2082   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2083   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2084 
2085   /* build local table of row and column ownerships */
2086   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2087 
2088   /* build cache for off array entries formed */
2089   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2090 
2091   b->donotstash  = PETSC_FALSE;
2092   b->colmap      = NULL;
2093   b->garray      = NULL;
2094   b->roworiented = PETSC_TRUE;
2095 
2096   /* stuff used in block assembly */
2097   b->barray = 0;
2098 
2099   /* stuff used for matrix vector multiply */
2100   b->lvec    = 0;
2101   b->Mvctx   = 0;
2102   b->slvec0  = 0;
2103   b->slvec0b = 0;
2104   b->slvec1  = 0;
2105   b->slvec1a = 0;
2106   b->slvec1b = 0;
2107   b->sMvctx  = 0;
2108 
2109   /* stuff for MatGetRow() */
2110   b->rowindices   = 0;
2111   b->rowvalues    = 0;
2112   b->getrowactive = PETSC_FALSE;
2113 
2114   /* hash table stuff */
2115   b->ht           = 0;
2116   b->hd           = 0;
2117   b->ht_size      = 0;
2118   b->ht_flag      = PETSC_FALSE;
2119   b->ht_fact      = 0;
2120   b->ht_total_ct  = 0;
2121   b->ht_insert_ct = 0;
2122 
2123   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
2124   b->ijonly = PETSC_FALSE;
2125 
2126   b->in_loc = 0;
2127   b->v_loc  = 0;
2128   b->n_loc  = 0;
2129 
2130   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2131   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2132   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
2133   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2134   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2135   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
2136 #if defined(PETSC_HAVE_ELEMENTAL)
2137   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2138 #endif
2139 
2140   B->symmetric                  = PETSC_TRUE;
2141   B->structurally_symmetric     = PETSC_TRUE;
2142   B->symmetric_set              = PETSC_TRUE;
2143   B->structurally_symmetric_set = PETSC_TRUE;
2144 
2145   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2146   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2147   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2148   if (flg) {
2149     PetscReal fact = 1.39;
2150     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2151     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2152     if (fact <= 1.0) fact = 1.39;
2153     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2154     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2155   }
2156   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2157   PetscFunctionReturn(0);
2158 }
2159 
2160 /*MC
2161    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2162 
2163    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2164    and MATMPISBAIJ otherwise.
2165 
2166    Options Database Keys:
2167 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2168 
2169   Level: beginner
2170 
2171 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
2172 M*/
2173 
2174 #undef __FUNCT__
2175 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
2176 /*@C
2177    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2178    the user should preallocate the matrix storage by setting the parameters
2179    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2180    performance can be increased by more than a factor of 50.
2181 
2182    Collective on Mat
2183 
2184    Input Parameters:
2185 +  B - the matrix
2186 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2187           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2188 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2189            submatrix  (same for all local rows)
2190 .  d_nnz - array containing the number of block nonzeros in the various block rows
2191            in the upper triangular and diagonal part of the in diagonal portion of the local
2192            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2193            for the diagonal entry and set a value even if it is zero.
2194 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2195            submatrix (same for all local rows).
2196 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2197            off-diagonal portion of the local submatrix that is right of the diagonal
2198            (possibly different for each block row) or NULL.
2199 
2200 
2201    Options Database Keys:
2202 .   -mat_no_unroll - uses code that does not unroll the loops in the
2203                      block calculations (much slower)
2204 .   -mat_block_size - size of the blocks to use
2205 
2206    Notes:
2207 
2208    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2209    than it must be used on all processors that share the object for that argument.
2210 
2211    If the *_nnz parameter is given then the *_nz parameter is ignored
2212 
2213    Storage Information:
2214    For a square global matrix we define each processor's diagonal portion
2215    to be its local rows and the corresponding columns (a square submatrix);
2216    each processor's off-diagonal portion encompasses the remainder of the
2217    local matrix (a rectangular submatrix).
2218 
2219    The user can specify preallocated storage for the diagonal part of
2220    the local submatrix with either d_nz or d_nnz (not both).  Set
2221    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2222    memory allocation.  Likewise, specify preallocated storage for the
2223    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2224 
2225    You can call MatGetInfo() to get information on how effective the preallocation was;
2226    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2227    You can also run with the option -info and look for messages with the string
2228    malloc in them to see if additional memory allocation was needed.
2229 
2230    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2231    the figure below we depict these three local rows and all columns (0-11).
2232 
2233 .vb
2234            0 1 2 3 4 5 6 7 8 9 10 11
2235           --------------------------
2236    row 3  |. . . d d d o o o o  o  o
2237    row 4  |. . . d d d o o o o  o  o
2238    row 5  |. . . d d d o o o o  o  o
2239           --------------------------
2240 .ve
2241 
2242    Thus, any entries in the d locations are stored in the d (diagonal)
2243    submatrix, and any entries in the o locations are stored in the
2244    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2245    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2246 
2247    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2248    plus the diagonal part of the d matrix,
2249    and o_nz should indicate the number of block nonzeros per row in the o matrix
2250 
2251    In general, for PDE problems in which most nonzeros are near the diagonal,
2252    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2253    or you will get TERRIBLE performance; see the users' manual chapter on
2254    matrices.
2255 
2256    Level: intermediate
2257 
2258 .keywords: matrix, block, aij, compressed row, sparse, parallel
2259 
2260 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2261 @*/
2262 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2263 {
2264   PetscErrorCode ierr;
2265 
2266   PetscFunctionBegin;
2267   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2268   PetscValidType(B,1);
2269   PetscValidLogicalCollectiveInt(B,bs,2);
2270   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2271   PetscFunctionReturn(0);
2272 }
2273 
2274 #undef __FUNCT__
2275 #define __FUNCT__ "MatCreateSBAIJ"
2276 /*@C
2277    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2278    (block compressed row).  For good matrix assembly performance
2279    the user should preallocate the matrix storage by setting the parameters
2280    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2281    performance can be increased by more than a factor of 50.
2282 
2283    Collective on MPI_Comm
2284 
2285    Input Parameters:
2286 +  comm - MPI communicator
2287 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2288           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2289 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2290            This value should be the same as the local size used in creating the
2291            y vector for the matrix-vector product y = Ax.
2292 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2293            This value should be the same as the local size used in creating the
2294            x vector for the matrix-vector product y = Ax.
2295 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2296 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2297 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2298            submatrix  (same for all local rows)
2299 .  d_nnz - array containing the number of block nonzeros in the various block rows
2300            in the upper triangular portion of the in diagonal portion of the local
2301            (possibly different for each block block row) or NULL.
2302            If you plan to factor the matrix you must leave room for the diagonal entry and
2303            set its value even if it is zero.
2304 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2305            submatrix (same for all local rows).
2306 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2307            off-diagonal portion of the local submatrix (possibly different for
2308            each block row) or NULL.
2309 
2310    Output Parameter:
2311 .  A - the matrix
2312 
2313    Options Database Keys:
2314 .   -mat_no_unroll - uses code that does not unroll the loops in the
2315                      block calculations (much slower)
2316 .   -mat_block_size - size of the blocks to use
2317 .   -mat_mpi - use the parallel matrix data structures even on one processor
2318                (defaults to using SeqBAIJ format on one processor)
2319 
2320    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2321    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2322    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2323 
2324    Notes:
2325    The number of rows and columns must be divisible by blocksize.
2326    This matrix type does not support complex Hermitian operation.
2327 
2328    The user MUST specify either the local or global matrix dimensions
2329    (possibly both).
2330 
2331    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2332    than it must be used on all processors that share the object for that argument.
2333 
2334    If the *_nnz parameter is given then the *_nz parameter is ignored
2335 
2336    Storage Information:
2337    For a square global matrix we define each processor's diagonal portion
2338    to be its local rows and the corresponding columns (a square submatrix);
2339    each processor's off-diagonal portion encompasses the remainder of the
2340    local matrix (a rectangular submatrix).
2341 
2342    The user can specify preallocated storage for the diagonal part of
2343    the local submatrix with either d_nz or d_nnz (not both).  Set
2344    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2345    memory allocation.  Likewise, specify preallocated storage for the
2346    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2347 
2348    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2349    the figure below we depict these three local rows and all columns (0-11).
2350 
2351 .vb
2352            0 1 2 3 4 5 6 7 8 9 10 11
2353           --------------------------
2354    row 3  |. . . d d d o o o o  o  o
2355    row 4  |. . . d d d o o o o  o  o
2356    row 5  |. . . d d d o o o o  o  o
2357           --------------------------
2358 .ve
2359 
2360    Thus, any entries in the d locations are stored in the d (diagonal)
2361    submatrix, and any entries in the o locations are stored in the
2362    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2363    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2364 
2365    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2366    plus the diagonal part of the d matrix,
2367    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2368    In general, for PDE problems in which most nonzeros are near the diagonal,
2369    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2370    or you will get TERRIBLE performance; see the users' manual chapter on
2371    matrices.
2372 
2373    Level: intermediate
2374 
2375 .keywords: matrix, block, aij, compressed row, sparse, parallel
2376 
2377 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2378 @*/
2379 
2380 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2381 {
2382   PetscErrorCode ierr;
2383   PetscMPIInt    size;
2384 
2385   PetscFunctionBegin;
2386   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2387   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2388   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2389   if (size > 1) {
2390     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2391     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2392   } else {
2393     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2394     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2395   }
2396   PetscFunctionReturn(0);
2397 }
2398 
2399 
2400 #undef __FUNCT__
2401 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2402 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2403 {
2404   Mat            mat;
2405   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2406   PetscErrorCode ierr;
2407   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2408   PetscScalar    *array;
2409 
2410   PetscFunctionBegin;
2411   *newmat = 0;
2412 
2413   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2414   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2415   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2416   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2417   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2418   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2419 
2420   mat->factortype   = matin->factortype;
2421   mat->preallocated = PETSC_TRUE;
2422   mat->assembled    = PETSC_TRUE;
2423   mat->insertmode   = NOT_SET_VALUES;
2424 
2425   a      = (Mat_MPISBAIJ*)mat->data;
2426   a->bs2 = oldmat->bs2;
2427   a->mbs = oldmat->mbs;
2428   a->nbs = oldmat->nbs;
2429   a->Mbs = oldmat->Mbs;
2430   a->Nbs = oldmat->Nbs;
2431 
2432 
2433   a->size         = oldmat->size;
2434   a->rank         = oldmat->rank;
2435   a->donotstash   = oldmat->donotstash;
2436   a->roworiented  = oldmat->roworiented;
2437   a->rowindices   = 0;
2438   a->rowvalues    = 0;
2439   a->getrowactive = PETSC_FALSE;
2440   a->barray       = 0;
2441   a->rstartbs     = oldmat->rstartbs;
2442   a->rendbs       = oldmat->rendbs;
2443   a->cstartbs     = oldmat->cstartbs;
2444   a->cendbs       = oldmat->cendbs;
2445 
2446   /* hash table stuff */
2447   a->ht           = 0;
2448   a->hd           = 0;
2449   a->ht_size      = 0;
2450   a->ht_flag      = oldmat->ht_flag;
2451   a->ht_fact      = oldmat->ht_fact;
2452   a->ht_total_ct  = 0;
2453   a->ht_insert_ct = 0;
2454 
2455   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2456   if (oldmat->colmap) {
2457 #if defined(PETSC_USE_CTABLE)
2458     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2459 #else
2460     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2461     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2462     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2463 #endif
2464   } else a->colmap = 0;
2465 
2466   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2467     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2468     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2469     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2470   } else a->garray = 0;
2471 
2472   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2473   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2474   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2475   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2476   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2477 
2478   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2479   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2480   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2481   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2482 
2483   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2484   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2485   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2486   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2487   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2488   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2489   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2490   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2491   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2492   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2493   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2494   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2495   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2496 
2497   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2498   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2499   a->sMvctx = oldmat->sMvctx;
2500   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2501 
2502   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2503   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2504   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2505   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2506   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2507   *newmat = mat;
2508   PetscFunctionReturn(0);
2509 }
2510 
2511 #undef __FUNCT__
2512 #define __FUNCT__ "MatLoad_MPISBAIJ"
2513 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2514 {
2515   PetscErrorCode ierr;
2516   PetscInt       i,nz,j,rstart,rend;
2517   PetscScalar    *vals,*buf;
2518   MPI_Comm       comm;
2519   MPI_Status     status;
2520   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2521   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2522   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2523   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2524   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2525   PetscInt       dcount,kmax,k,nzcount,tmp;
2526   int            fd;
2527 
2528   PetscFunctionBegin;
2529   /* force binary viewer to load .info file if it has not yet done so */
2530   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2531   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2532   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2533   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2534   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2535   if (bs < 0) bs = 1;
2536 
2537   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2538   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2539   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2540   if (!rank) {
2541     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2542     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2543     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2544   }
2545 
2546   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2547   M    = header[1];
2548   N    = header[2];
2549 
2550   /* If global sizes are set, check if they are consistent with that given in the file */
2551   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
2552   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
2553 
2554   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2555 
2556   /*
2557      This code adds extra rows to make sure the number of rows is
2558      divisible by the blocksize
2559   */
2560   Mbs        = M/bs;
2561   extra_rows = bs - M + bs*(Mbs);
2562   if (extra_rows == bs) extra_rows = 0;
2563   else                  Mbs++;
2564   if (extra_rows &&!rank) {
2565     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2566   }
2567 
2568   /* determine ownership of all rows */
2569   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2570     mbs = Mbs/size + ((Mbs % size) > rank);
2571     m   = mbs*bs;
2572   } else { /* User Set */
2573     m   = newmat->rmap->n;
2574     mbs = m/bs;
2575   }
2576   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2577   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2578   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2579   rowners[0] = 0;
2580   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2581   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2582   rstart = rowners[rank];
2583   rend   = rowners[rank+1];
2584 
2585   /* distribute row lengths to all processors */
2586   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2587   if (!rank) {
2588     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2589     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2590     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2591     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2592     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2593     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2594     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2595   } else {
2596     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2597   }
2598 
2599   if (!rank) {   /* procs[0] */
2600     /* calculate the number of nonzeros on each processor */
2601     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2602     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2603     for (i=0; i<size; i++) {
2604       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2605         procsnz[i] += rowlengths[j];
2606       }
2607     }
2608     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2609 
2610     /* determine max buffer needed and allocate it */
2611     maxnz = 0;
2612     for (i=0; i<size; i++) {
2613       maxnz = PetscMax(maxnz,procsnz[i]);
2614     }
2615     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2616 
2617     /* read in my part of the matrix column indices  */
2618     nz     = procsnz[0];
2619     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2620     mycols = ibuf;
2621     if (size == 1) nz -= extra_rows;
2622     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2623     if (size == 1) {
2624       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2625     }
2626 
2627     /* read in every ones (except the last) and ship off */
2628     for (i=1; i<size-1; i++) {
2629       nz   = procsnz[i];
2630       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2631       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2632     }
2633     /* read in the stuff for the last proc */
2634     if (size != 1) {
2635       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2636       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2637       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2638       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2639     }
2640     ierr = PetscFree(cols);CHKERRQ(ierr);
2641   } else {  /* procs[i], i>0 */
2642     /* determine buffer space needed for message */
2643     nz = 0;
2644     for (i=0; i<m; i++) nz += locrowlens[i];
2645     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2646     mycols = ibuf;
2647     /* receive message of column indices*/
2648     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2649     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2650     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2651   }
2652 
2653   /* loop over local rows, determining number of off diagonal entries */
2654   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2655   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2656   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2657   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2658   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2659   rowcount = 0;
2660   nzcount  = 0;
2661   for (i=0; i<mbs; i++) {
2662     dcount  = 0;
2663     odcount = 0;
2664     for (j=0; j<bs; j++) {
2665       kmax = locrowlens[rowcount];
2666       for (k=0; k<kmax; k++) {
2667         tmp = mycols[nzcount++]/bs; /* block col. index */
2668         if (!mask[tmp]) {
2669           mask[tmp] = 1;
2670           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2671           else masked1[dcount++] = tmp; /* entry in diag portion */
2672         }
2673       }
2674       rowcount++;
2675     }
2676 
2677     dlens[i]  = dcount;  /* d_nzz[i] */
2678     odlens[i] = odcount; /* o_nzz[i] */
2679 
2680     /* zero out the mask elements we set */
2681     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2682     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2683   }
2684   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2685   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2686   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2687 
2688   if (!rank) {
2689     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2690     /* read in my part of the matrix numerical values  */
2691     nz     = procsnz[0];
2692     vals   = buf;
2693     mycols = ibuf;
2694     if (size == 1) nz -= extra_rows;
2695     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2696     if (size == 1) {
2697       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2698     }
2699 
2700     /* insert into matrix */
2701     jj = rstart*bs;
2702     for (i=0; i<m; i++) {
2703       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2704       mycols += locrowlens[i];
2705       vals   += locrowlens[i];
2706       jj++;
2707     }
2708 
2709     /* read in other processors (except the last one) and ship out */
2710     for (i=1; i<size-1; i++) {
2711       nz   = procsnz[i];
2712       vals = buf;
2713       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2714       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2715     }
2716     /* the last proc */
2717     if (size != 1) {
2718       nz   = procsnz[i] - extra_rows;
2719       vals = buf;
2720       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2721       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2722       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2723     }
2724     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2725 
2726   } else {
2727     /* receive numeric values */
2728     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2729 
2730     /* receive message of values*/
2731     vals   = buf;
2732     mycols = ibuf;
2733     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2734     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2735     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2736 
2737     /* insert into matrix */
2738     jj = rstart*bs;
2739     for (i=0; i<m; i++) {
2740       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2741       mycols += locrowlens[i];
2742       vals   += locrowlens[i];
2743       jj++;
2744     }
2745   }
2746 
2747   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2748   ierr = PetscFree(buf);CHKERRQ(ierr);
2749   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2750   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2751   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2752   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2753   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2754   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2755   PetscFunctionReturn(0);
2756 }
2757 
2758 #undef __FUNCT__
2759 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2760 /*XXXXX@
2761    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2762 
2763    Input Parameters:
2764 .  mat  - the matrix
2765 .  fact - factor
2766 
2767    Not Collective on Mat, each process can have a different hash factor
2768 
2769    Level: advanced
2770 
2771   Notes:
2772    This can also be set by the command line option: -mat_use_hash_table fact
2773 
2774 .keywords: matrix, hashtable, factor, HT
2775 
2776 .seealso: MatSetOption()
2777 @XXXXX*/
2778 
2779 
2780 #undef __FUNCT__
2781 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2782 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2783 {
2784   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2785   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2786   PetscReal      atmp;
2787   PetscReal      *work,*svalues,*rvalues;
2788   PetscErrorCode ierr;
2789   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2790   PetscMPIInt    rank,size;
2791   PetscInt       *rowners_bs,dest,count,source;
2792   PetscScalar    *va;
2793   MatScalar      *ba;
2794   MPI_Status     stat;
2795 
2796   PetscFunctionBegin;
2797   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2798   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2799   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2800 
2801   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2802   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2803 
2804   bs  = A->rmap->bs;
2805   mbs = a->mbs;
2806   Mbs = a->Mbs;
2807   ba  = b->a;
2808   bi  = b->i;
2809   bj  = b->j;
2810 
2811   /* find ownerships */
2812   rowners_bs = A->rmap->range;
2813 
2814   /* each proc creates an array to be distributed */
2815   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2816   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2817 
2818   /* row_max for B */
2819   if (rank != size-1) {
2820     for (i=0; i<mbs; i++) {
2821       ncols = bi[1] - bi[0]; bi++;
2822       brow  = bs*i;
2823       for (j=0; j<ncols; j++) {
2824         bcol = bs*(*bj);
2825         for (kcol=0; kcol<bs; kcol++) {
2826           col  = bcol + kcol;                /* local col index */
2827           col += rowners_bs[rank+1];      /* global col index */
2828           for (krow=0; krow<bs; krow++) {
2829             atmp = PetscAbsScalar(*ba); ba++;
2830             row  = brow + krow;   /* local row index */
2831             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2832             if (work[col] < atmp) work[col] = atmp;
2833           }
2834         }
2835         bj++;
2836       }
2837     }
2838 
2839     /* send values to its owners */
2840     for (dest=rank+1; dest<size; dest++) {
2841       svalues = work + rowners_bs[dest];
2842       count   = rowners_bs[dest+1]-rowners_bs[dest];
2843       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2844     }
2845   }
2846 
2847   /* receive values */
2848   if (rank) {
2849     rvalues = work;
2850     count   = rowners_bs[rank+1]-rowners_bs[rank];
2851     for (source=0; source<rank; source++) {
2852       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2853       /* process values */
2854       for (i=0; i<count; i++) {
2855         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2856       }
2857     }
2858   }
2859 
2860   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2861   ierr = PetscFree(work);CHKERRQ(ierr);
2862   PetscFunctionReturn(0);
2863 }
2864 
2865 #undef __FUNCT__
2866 #define __FUNCT__ "MatSOR_MPISBAIJ"
2867 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2868 {
2869   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2870   PetscErrorCode    ierr;
2871   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2872   PetscScalar       *x,*ptr,*from;
2873   Vec               bb1;
2874   const PetscScalar *b;
2875 
2876   PetscFunctionBegin;
2877   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2878   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2879 
2880   if (flag == SOR_APPLY_UPPER) {
2881     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2882     PetscFunctionReturn(0);
2883   }
2884 
2885   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2886     if (flag & SOR_ZERO_INITIAL_GUESS) {
2887       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2888       its--;
2889     }
2890 
2891     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2892     while (its--) {
2893 
2894       /* lower triangular part: slvec0b = - B^T*xx */
2895       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2896 
2897       /* copy xx into slvec0a */
2898       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2899       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2900       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2901       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2902 
2903       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2904 
2905       /* copy bb into slvec1a */
2906       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2907       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2908       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2909       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2910 
2911       /* set slvec1b = 0 */
2912       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2913 
2914       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2915       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2916       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2917       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2918 
2919       /* upper triangular part: bb1 = bb1 - B*x */
2920       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2921 
2922       /* local diagonal sweep */
2923       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2924     }
2925     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2926   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2927     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2928   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2929     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2930   } else if (flag & SOR_EISENSTAT) {
2931     Vec               xx1;
2932     PetscBool         hasop;
2933     const PetscScalar *diag;
2934     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2935     PetscInt          i,n;
2936 
2937     if (!mat->xx1) {
2938       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2939       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2940     }
2941     xx1 = mat->xx1;
2942     bb1 = mat->bb1;
2943 
2944     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2945 
2946     if (!mat->diag) {
2947       /* this is wrong for same matrix with new nonzero values */
2948       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2949       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2950     }
2951     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2952 
2953     if (hasop) {
2954       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2955       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2956     } else {
2957       /*
2958           These two lines are replaced by code that may be a bit faster for a good compiler
2959       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2960       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2961       */
2962       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2963       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2964       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2965       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2966       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2967       if (omega == 1.0) {
2968         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2969         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2970       } else {
2971         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2972         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2973       }
2974       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2975       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2976       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2977       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2978     }
2979 
2980     /* multiply off-diagonal portion of matrix */
2981     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2982     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2983     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2984     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2985     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2986     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2987     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2988     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2989     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2990     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2991 
2992     /* local sweep */
2993     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2994     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2995   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2996   PetscFunctionReturn(0);
2997 }
2998 
2999 #undef __FUNCT__
3000 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
3001 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
3002 {
3003   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
3004   PetscErrorCode ierr;
3005   Vec            lvec1,bb1;
3006 
3007   PetscFunctionBegin;
3008   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3009   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3010 
3011   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3012     if (flag & SOR_ZERO_INITIAL_GUESS) {
3013       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
3014       its--;
3015     }
3016 
3017     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
3018     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
3019     while (its--) {
3020       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3021 
3022       /* lower diagonal part: bb1 = bb - B^T*xx */
3023       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
3024       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
3025 
3026       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3027       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
3028       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3029 
3030       /* upper diagonal part: bb1 = bb1 - B*x */
3031       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
3032       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
3033 
3034       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3035 
3036       /* diagonal sweep */
3037       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3038     }
3039     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
3040     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3041   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3042   PetscFunctionReturn(0);
3043 }
3044 
3045 #undef __FUNCT__
3046 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
3047 /*@
3048      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
3049          CSR format the local rows.
3050 
3051    Collective on MPI_Comm
3052 
3053    Input Parameters:
3054 +  comm - MPI communicator
3055 .  bs - the block size, only a block size of 1 is supported
3056 .  m - number of local rows (Cannot be PETSC_DECIDE)
3057 .  n - This value should be the same as the local size used in creating the
3058        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3059        calculated if N is given) For square matrices n is almost always m.
3060 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3061 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3062 .   i - row indices
3063 .   j - column indices
3064 -   a - matrix values
3065 
3066    Output Parameter:
3067 .   mat - the matrix
3068 
3069    Level: intermediate
3070 
3071    Notes:
3072        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3073      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3074      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3075 
3076        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3077 
3078 .keywords: matrix, aij, compressed row, sparse, parallel
3079 
3080 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3081           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3082 @*/
3083 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3084 {
3085   PetscErrorCode ierr;
3086 
3087 
3088   PetscFunctionBegin;
3089   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3090   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3091   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3092   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3093   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3094   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3095   PetscFunctionReturn(0);
3096 }
3097 
3098 
3099 #undef __FUNCT__
3100 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
3101 /*@C
3102    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
3103    (the default parallel PETSc format).
3104 
3105    Collective on MPI_Comm
3106 
3107    Input Parameters:
3108 +  B - the matrix
3109 .  bs - the block size
3110 .  i - the indices into j for the start of each local row (starts with zero)
3111 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3112 -  v - optional values in the matrix
3113 
3114    Level: developer
3115 
3116 .keywords: matrix, aij, compressed row, sparse, parallel
3117 
3118 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
3119 @*/
3120 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3121 {
3122   PetscErrorCode ierr;
3123 
3124   PetscFunctionBegin;
3125   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3126   PetscFunctionReturn(0);
3127 }
3128 
3129 #undef __FUNCT__
3130 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ"
3131 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3132 {
3133   PetscErrorCode ierr;
3134   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3135   PetscInt       *indx;
3136   PetscScalar    *values;
3137 
3138   PetscFunctionBegin;
3139   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3140   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3141     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
3142     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
3143     PetscInt       *bindx,rmax=a->rmax,j;
3144 
3145     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3146     mbs = m/bs; Nbs = N/cbs;
3147     if (n == PETSC_DECIDE) {
3148       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
3149     }
3150     /* Check sum(n) = Nbs */
3151     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3152     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
3153 
3154     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3155     rstart -= mbs;
3156 
3157     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3158     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
3159     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3160     for (i=0; i<mbs; i++) {
3161       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3162       nnz = nnz/bs;
3163       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3164       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3165       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3166     }
3167     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3168     ierr = PetscFree(bindx);CHKERRQ(ierr);
3169 
3170     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3171     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3172     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3173     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
3174     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3175     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3176   }
3177 
3178   /* numeric phase */
3179   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3180   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3181 
3182   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3183   for (i=0; i<m; i++) {
3184     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3185     Ii   = i + rstart;
3186     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3187     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3188   }
3189   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3190   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3191   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3192   PetscFunctionReturn(0);
3193 }
3194