xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 101da5f51f3cf6340ec2fbcb4e4bcb2c811922e6)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 #undef __FUNCT__
11 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
12 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
13 {
14   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
15   PetscErrorCode ierr;
16 
17   PetscFunctionBegin;
18   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
19   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
20   PetscFunctionReturn(0);
21 }
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
25 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
26 {
27   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
28   PetscErrorCode ierr;
29 
30   PetscFunctionBegin;
31   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
32   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
33   PetscFunctionReturn(0);
34 }
35 
36 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
37   { \
38  \
39     brow = row/bs;  \
40     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
41     rmax = aimax[brow]; nrow = ailen[brow]; \
42     bcol = col/bs; \
43     ridx = row % bs; cidx = col % bs; \
44     low  = 0; high = nrow; \
45     while (high-low > 3) { \
46       t = (low+high)/2; \
47       if (rp[t] > bcol) high = t; \
48       else              low  = t; \
49     } \
50     for (_i=low; _i<high; _i++) { \
51       if (rp[_i] > bcol) break; \
52       if (rp[_i] == bcol) { \
53         bap = ap + bs2*_i + bs*cidx + ridx; \
54         if (addv == ADD_VALUES) *bap += value;  \
55         else                    *bap  = value;  \
56         goto a_noinsert; \
57       } \
58     } \
59     if (a->nonew == 1) goto a_noinsert; \
60     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
61     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
62     N = nrow++ - 1;  \
63     /* shift up all the later entries in this row */ \
64     for (ii=N; ii>=_i; ii--) { \
65       rp[ii+1] = rp[ii]; \
66       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
67     } \
68     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
69     rp[_i]                      = bcol;  \
70     ap[bs2*_i + bs*cidx + ridx] = value;  \
71     A->nonzerostate++;\
72 a_noinsert:; \
73     ailen[brow] = nrow; \
74   }
75 
76 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
77   { \
78     brow = row/bs;  \
79     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
80     rmax = bimax[brow]; nrow = bilen[brow]; \
81     bcol = col/bs; \
82     ridx = row % bs; cidx = col % bs; \
83     low  = 0; high = nrow; \
84     while (high-low > 3) { \
85       t = (low+high)/2; \
86       if (rp[t] > bcol) high = t; \
87       else              low  = t; \
88     } \
89     for (_i=low; _i<high; _i++) { \
90       if (rp[_i] > bcol) break; \
91       if (rp[_i] == bcol) { \
92         bap = ap + bs2*_i + bs*cidx + ridx; \
93         if (addv == ADD_VALUES) *bap += value;  \
94         else                    *bap  = value;  \
95         goto b_noinsert; \
96       } \
97     } \
98     if (b->nonew == 1) goto b_noinsert; \
99     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
100     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
101     N = nrow++ - 1;  \
102     /* shift up all the later entries in this row */ \
103     for (ii=N; ii>=_i; ii--) { \
104       rp[ii+1] = rp[ii]; \
105       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
106     } \
107     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
108     rp[_i]                      = bcol;  \
109     ap[bs2*_i + bs*cidx + ridx] = value;  \
110     B->nonzerostate++;\
111 b_noinsert:; \
112     bilen[brow] = nrow; \
113   }
114 
115 /* Only add/insert a(i,j) with i<=j (blocks).
116    Any a(i,j) with i>j input by user is ingored.
117 */
118 #undef __FUNCT__
119 #define __FUNCT__ "MatSetValues_MPISBAIJ"
120 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
121 {
122   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
123   MatScalar      value;
124   PetscBool      roworiented = baij->roworiented;
125   PetscErrorCode ierr;
126   PetscInt       i,j,row,col;
127   PetscInt       rstart_orig=mat->rmap->rstart;
128   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
129   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
130 
131   /* Some Variables required in the macro */
132   Mat          A     = baij->A;
133   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
134   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
135   MatScalar    *aa   =a->a;
136 
137   Mat         B     = baij->B;
138   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
139   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
140   MatScalar   *ba   =b->a;
141 
142   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
143   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
144   MatScalar *ap,*bap;
145 
146   /* for stash */
147   PetscInt  n_loc, *in_loc = NULL;
148   MatScalar *v_loc = NULL;
149 
150   PetscFunctionBegin;
151   if (!baij->donotstash) {
152     if (n > baij->n_loc) {
153       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
154       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
155       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
156       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
157 
158       baij->n_loc = n;
159     }
160     in_loc = baij->in_loc;
161     v_loc  = baij->v_loc;
162   }
163 
164   for (i=0; i<m; i++) {
165     if (im[i] < 0) continue;
166 #if defined(PETSC_USE_DEBUG)
167     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
168 #endif
169     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
170       row = im[i] - rstart_orig;              /* local row index */
171       for (j=0; j<n; j++) {
172         if (im[i]/bs > in[j]/bs) {
173           if (a->ignore_ltriangular) {
174             continue;    /* ignore lower triangular blocks */
175           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
176         }
177         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
178           col  = in[j] - cstart_orig;         /* local col index */
179           brow = row/bs; bcol = col/bs;
180           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
181           if (roworiented) value = v[i*n+j];
182           else             value = v[i+j*m];
183           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
184           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
185         } else if (in[j] < 0) continue;
186 #if defined(PETSC_USE_DEBUG)
187         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
188 #endif
189         else {  /* off-diag entry (B) */
190           if (mat->was_assembled) {
191             if (!baij->colmap) {
192               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
193             }
194 #if defined(PETSC_USE_CTABLE)
195             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
196             col  = col - 1;
197 #else
198             col = baij->colmap[in[j]/bs] - 1;
199 #endif
200             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
201               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
202               col  =  in[j];
203               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
204               B    = baij->B;
205               b    = (Mat_SeqBAIJ*)(B)->data;
206               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
207               ba   = b->a;
208             } else col += in[j]%bs;
209           } else col = in[j];
210           if (roworiented) value = v[i*n+j];
211           else             value = v[i+j*m];
212           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
213           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
214         }
215       }
216     } else {  /* off processor entry */
217       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
218       if (!baij->donotstash) {
219         mat->assembled = PETSC_FALSE;
220         n_loc          = 0;
221         for (j=0; j<n; j++) {
222           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
223           in_loc[n_loc] = in[j];
224           if (roworiented) {
225             v_loc[n_loc] = v[i*n+j];
226           } else {
227             v_loc[n_loc] = v[j*m+i];
228           }
229           n_loc++;
230         }
231         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ_Inlined"
240 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
241 {
242   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
243   PetscErrorCode    ierr;
244   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
245   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
246   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
247   PetscBool         roworiented=a->roworiented;
248   const PetscScalar *value     = v;
249   MatScalar         *ap,*aa = a->a,*bap;
250 
251   PetscFunctionBegin;
252   if (col < row) {
253     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
254     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
255   }
256   rp   = aj + ai[row];
257   ap   = aa + bs2*ai[row];
258   rmax = imax[row];
259   nrow = ailen[row];
260   value = v;
261   low   = 0;
262   high  = nrow;
263 
264   while (high-low > 7) {
265     t = (low+high)/2;
266     if (rp[t] > col) high = t;
267     else             low  = t;
268   }
269   for (i=low; i<high; i++) {
270     if (rp[i] > col) break;
271     if (rp[i] == col) {
272       bap = ap +  bs2*i;
273       if (roworiented) {
274         if (is == ADD_VALUES) {
275           for (ii=0; ii<bs; ii++) {
276             for (jj=ii; jj<bs2; jj+=bs) {
277               bap[jj] += *value++;
278             }
279           }
280         } else {
281           for (ii=0; ii<bs; ii++) {
282             for (jj=ii; jj<bs2; jj+=bs) {
283               bap[jj] = *value++;
284             }
285           }
286         }
287       } else {
288         if (is == ADD_VALUES) {
289           for (ii=0; ii<bs; ii++) {
290             for (jj=0; jj<bs; jj++) {
291               *bap++ += *value++;
292             }
293           }
294         } else {
295           for (ii=0; ii<bs; ii++) {
296             for (jj=0; jj<bs; jj++) {
297               *bap++  = *value++;
298             }
299           }
300         }
301       }
302       goto noinsert2;
303     }
304   }
305   if (nonew == 1) goto noinsert2;
306   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
307   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
308   N = nrow++ - 1; high++;
309   /* shift up all the later entries in this row */
310   for (ii=N; ii>=i; ii--) {
311     rp[ii+1] = rp[ii];
312     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
313   }
314   if (N >= i) {
315     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
316   }
317   rp[i] = col;
318   bap   = ap +  bs2*i;
319   if (roworiented) {
320     for (ii=0; ii<bs; ii++) {
321       for (jj=ii; jj<bs2; jj+=bs) {
322         bap[jj] = *value++;
323       }
324     }
325   } else {
326     for (ii=0; ii<bs; ii++) {
327       for (jj=0; jj<bs; jj++) {
328         *bap++ = *value++;
329       }
330     }
331   }
332   noinsert2:;
333   ailen[row] = nrow;
334   PetscFunctionReturn(0);
335 }
336 
337 #undef __FUNCT__
338 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined"
339 /*
340    This routine is exactly duplicated in mpibaij.c
341 */
342 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
343 {
344   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
345   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
346   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
347   PetscErrorCode    ierr;
348   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
349   PetscBool         roworiented=a->roworiented;
350   const PetscScalar *value     = v;
351   MatScalar         *ap,*aa = a->a,*bap;
352 
353   PetscFunctionBegin;
354   rp   = aj + ai[row];
355   ap   = aa + bs2*ai[row];
356   rmax = imax[row];
357   nrow = ailen[row];
358   low  = 0;
359   high = nrow;
360   value = v;
361   while (high-low > 7) {
362     t = (low+high)/2;
363     if (rp[t] > col) high = t;
364     else             low  = t;
365   }
366   for (i=low; i<high; i++) {
367     if (rp[i] > col) break;
368     if (rp[i] == col) {
369       bap = ap +  bs2*i;
370       if (roworiented) {
371         if (is == ADD_VALUES) {
372           for (ii=0; ii<bs; ii++) {
373             for (jj=ii; jj<bs2; jj+=bs) {
374               bap[jj] += *value++;
375             }
376           }
377         } else {
378           for (ii=0; ii<bs; ii++) {
379             for (jj=ii; jj<bs2; jj+=bs) {
380               bap[jj] = *value++;
381             }
382           }
383         }
384       } else {
385         if (is == ADD_VALUES) {
386           for (ii=0; ii<bs; ii++,value+=bs) {
387             for (jj=0; jj<bs; jj++) {
388               bap[jj] += value[jj];
389             }
390             bap += bs;
391           }
392         } else {
393           for (ii=0; ii<bs; ii++,value+=bs) {
394             for (jj=0; jj<bs; jj++) {
395               bap[jj]  = value[jj];
396             }
397             bap += bs;
398           }
399         }
400       }
401       goto noinsert2;
402     }
403   }
404   if (nonew == 1) goto noinsert2;
405   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
406   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
407   N = nrow++ - 1; high++;
408   /* shift up all the later entries in this row */
409   for (ii=N; ii>=i; ii--) {
410     rp[ii+1] = rp[ii];
411     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
412   }
413   if (N >= i) {
414     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
415   }
416   rp[i] = col;
417   bap   = ap +  bs2*i;
418   if (roworiented) {
419     for (ii=0; ii<bs; ii++) {
420       for (jj=ii; jj<bs2; jj+=bs) {
421         bap[jj] = *value++;
422       }
423     }
424   } else {
425     for (ii=0; ii<bs; ii++) {
426       for (jj=0; jj<bs; jj++) {
427         *bap++ = *value++;
428       }
429     }
430   }
431   noinsert2:;
432   ailen[row] = nrow;
433   PetscFunctionReturn(0);
434 }
435 
436 #undef __FUNCT__
437 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
438 /*
439     This routine could be optimized by removing the need for the block copy below and passing stride information
440   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
441 */
442 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
443 {
444   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
445   const MatScalar *value;
446   MatScalar       *barray     =baij->barray;
447   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
448   PetscErrorCode  ierr;
449   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
450   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
451   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
452 
453   PetscFunctionBegin;
454   if (!barray) {
455     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
456     baij->barray = barray;
457   }
458 
459   if (roworiented) {
460     stepval = (n-1)*bs;
461   } else {
462     stepval = (m-1)*bs;
463   }
464   for (i=0; i<m; i++) {
465     if (im[i] < 0) continue;
466 #if defined(PETSC_USE_DEBUG)
467     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
468 #endif
469     if (im[i] >= rstart && im[i] < rend) {
470       row = im[i] - rstart;
471       for (j=0; j<n; j++) {
472         if (im[i] > in[j]) {
473           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
474           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
475         }
476         /* If NumCol = 1 then a copy is not required */
477         if ((roworiented) && (n == 1)) {
478           barray = (MatScalar*) v + i*bs2;
479         } else if ((!roworiented) && (m == 1)) {
480           barray = (MatScalar*) v + j*bs2;
481         } else { /* Here a copy is required */
482           if (roworiented) {
483             value = v + i*(stepval+bs)*bs + j*bs;
484           } else {
485             value = v + j*(stepval+bs)*bs + i*bs;
486           }
487           for (ii=0; ii<bs; ii++,value+=stepval) {
488             for (jj=0; jj<bs; jj++) {
489               *barray++ = *value++;
490             }
491           }
492           barray -=bs2;
493         }
494 
495         if (in[j] >= cstart && in[j] < cend) {
496           col  = in[j] - cstart;
497           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
498         } else if (in[j] < 0) continue;
499 #if defined(PETSC_USE_DEBUG)
500         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
501 #endif
502         else {
503           if (mat->was_assembled) {
504             if (!baij->colmap) {
505               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
506             }
507 
508 #if defined(PETSC_USE_DEBUG)
509 #if defined(PETSC_USE_CTABLE)
510             { PetscInt data;
511               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
512               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
513             }
514 #else
515             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
516 #endif
517 #endif
518 #if defined(PETSC_USE_CTABLE)
519             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
520             col  = (col - 1)/bs;
521 #else
522             col = (baij->colmap[in[j]] - 1)/bs;
523 #endif
524             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
525               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
526               col  = in[j];
527             }
528           } else col = in[j];
529           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
530         }
531       }
532     } else {
533       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
534       if (!baij->donotstash) {
535         if (roworiented) {
536           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
537         } else {
538           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
539         }
540       }
541     }
542   }
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatGetValues_MPISBAIJ"
548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
549 {
550   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
551   PetscErrorCode ierr;
552   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
553   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
554 
555   PetscFunctionBegin;
556   for (i=0; i<m; i++) {
557     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
558     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
559     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
560       row = idxm[i] - bsrstart;
561       for (j=0; j<n; j++) {
562         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
563         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
564         if (idxn[j] >= bscstart && idxn[j] < bscend) {
565           col  = idxn[j] - bscstart;
566           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
567         } else {
568           if (!baij->colmap) {
569             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
570           }
571 #if defined(PETSC_USE_CTABLE)
572           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
573           data--;
574 #else
575           data = baij->colmap[idxn[j]/bs]-1;
576 #endif
577           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
578           else {
579             col  = data + idxn[j]%bs;
580             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
581           }
582         }
583       }
584     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
585   }
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatNorm_MPISBAIJ"
591 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
592 {
593   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
594   PetscErrorCode ierr;
595   PetscReal      sum[2],*lnorm2;
596 
597   PetscFunctionBegin;
598   if (baij->size == 1) {
599     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
600   } else {
601     if (type == NORM_FROBENIUS) {
602       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
603       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
604       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
605       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
606       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
607       ierr    = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
608       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
609       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
610     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
611       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
612       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
613       PetscReal    *rsum,*rsum2,vabs;
614       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
615       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
616       MatScalar    *v;
617 
618       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
619       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
620       /* Amat */
621       v = amat->a; jj = amat->j;
622       for (brow=0; brow<mbs; brow++) {
623         grow = bs*(rstart + brow);
624         nz   = amat->i[brow+1] - amat->i[brow];
625         for (bcol=0; bcol<nz; bcol++) {
626           gcol = bs*(rstart + *jj); jj++;
627           for (col=0; col<bs; col++) {
628             for (row=0; row<bs; row++) {
629               vabs            = PetscAbsScalar(*v); v++;
630               rsum[gcol+col] += vabs;
631               /* non-diagonal block */
632               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
633             }
634           }
635         }
636         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
637       }
638       /* Bmat */
639       v = bmat->a; jj = bmat->j;
640       for (brow=0; brow<mbs; brow++) {
641         grow = bs*(rstart + brow);
642         nz = bmat->i[brow+1] - bmat->i[brow];
643         for (bcol=0; bcol<nz; bcol++) {
644           gcol = bs*garray[*jj]; jj++;
645           for (col=0; col<bs; col++) {
646             for (row=0; row<bs; row++) {
647               vabs            = PetscAbsScalar(*v); v++;
648               rsum[gcol+col] += vabs;
649               rsum[grow+row] += vabs;
650             }
651           }
652         }
653         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
654       }
655       ierr  = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
656       *norm = 0.0;
657       for (col=0; col<mat->cmap->N; col++) {
658         if (rsum2[col] > *norm) *norm = rsum2[col];
659       }
660       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
661     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
662   }
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
668 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
669 {
670   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
671   PetscErrorCode ierr;
672   PetscInt       nstash,reallocs;
673 
674   PetscFunctionBegin;
675   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
676 
677   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
678   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
679   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
680   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
681   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
682   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
683   PetscFunctionReturn(0);
684 }
685 
686 #undef __FUNCT__
687 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
688 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
689 {
690   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
691   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
692   PetscErrorCode ierr;
693   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
694   PetscInt       *row,*col;
695   PetscBool      other_disassembled;
696   PetscMPIInt    n;
697   PetscBool      r1,r2,r3;
698   MatScalar      *val;
699 
700   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
701   PetscFunctionBegin;
702   if (!baij->donotstash &&  !mat->nooffprocentries) {
703     while (1) {
704       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
705       if (!flg) break;
706 
707       for (i=0; i<n;) {
708         /* Now identify the consecutive vals belonging to the same row */
709         for (j=i,rstart=row[j]; j<n; j++) {
710           if (row[j] != rstart) break;
711         }
712         if (j < n) ncols = j-i;
713         else       ncols = n-i;
714         /* Now assemble all these values with a single function call */
715         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
716         i    = j;
717       }
718     }
719     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
720     /* Now process the block-stash. Since the values are stashed column-oriented,
721        set the roworiented flag to column oriented, and after MatSetValues()
722        restore the original flags */
723     r1 = baij->roworiented;
724     r2 = a->roworiented;
725     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
726 
727     baij->roworiented = PETSC_FALSE;
728     a->roworiented    = PETSC_FALSE;
729 
730     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
731     while (1) {
732       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
733       if (!flg) break;
734 
735       for (i=0; i<n;) {
736         /* Now identify the consecutive vals belonging to the same row */
737         for (j=i,rstart=row[j]; j<n; j++) {
738           if (row[j] != rstart) break;
739         }
740         if (j < n) ncols = j-i;
741         else       ncols = n-i;
742         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
743         i    = j;
744       }
745     }
746     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
747 
748     baij->roworiented = r1;
749     a->roworiented    = r2;
750 
751     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
752   }
753 
754   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
755   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
756 
757   /* determine if any processor has disassembled, if so we must
758      also disassemble ourselfs, in order that we may reassemble. */
759   /*
760      if nonzero structure of submatrix B cannot change then we know that
761      no processor disassembled thus we can skip this stuff
762   */
763   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
764     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
765     if (mat->was_assembled && !other_disassembled) {
766       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
767     }
768   }
769 
770   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
771     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
772   }
773   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
774   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
775 
776   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
777 
778   baij->rowvalues = 0;
779 
780   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
781   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
782     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
783     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
784   }
785   PetscFunctionReturn(0);
786 }
787 
788 extern PetscErrorCode MatView_SeqSBAIJ(Mat,PetscViewer);
789 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
790 #include <petscdraw.h>
791 #undef __FUNCT__
792 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
793 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
794 {
795   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
796   PetscErrorCode    ierr;
797   PetscInt          bs   = mat->rmap->bs;
798   PetscMPIInt       rank = baij->rank;
799   PetscBool         iascii,isdraw;
800   PetscViewer       sviewer;
801   PetscViewerFormat format;
802 
803   PetscFunctionBegin;
804   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
805   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
806   if (iascii) {
807     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
808     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
809       MatInfo info;
810       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
811       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
812       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
813       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
814       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
815       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
816       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
817       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
818       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
819       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
820       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
821       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
822       PetscFunctionReturn(0);
823     } else if (format == PETSC_VIEWER_ASCII_INFO) {
824       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
825       PetscFunctionReturn(0);
826     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
827       PetscFunctionReturn(0);
828     }
829   }
830 
831   if (isdraw) {
832     PetscDraw draw;
833     PetscBool isnull;
834     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
835     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
836     if (isnull) PetscFunctionReturn(0);
837   }
838 
839   {
840     /* assemble the entire matrix onto first processor. */
841     Mat          A;
842     Mat_SeqSBAIJ *Aloc;
843     Mat_SeqBAIJ  *Bloc;
844     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
845     MatScalar    *a;
846     const char   *matname;
847 
848     /* Should this be the same type as mat? */
849     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
850     if (!rank) {
851       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
852     } else {
853       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
854     }
855     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
856     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
857     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
858     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
859 
860     /* copy over the A part */
861     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
862     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
863     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
864 
865     for (i=0; i<mbs; i++) {
866       rvals[0] = bs*(baij->rstartbs + i);
867       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
868       for (j=ai[i]; j<ai[i+1]; j++) {
869         col = (baij->cstartbs+aj[j])*bs;
870         for (k=0; k<bs; k++) {
871           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
872           col++;
873           a += bs;
874         }
875       }
876     }
877     /* copy over the B part */
878     Bloc = (Mat_SeqBAIJ*)baij->B->data;
879     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
880     for (i=0; i<mbs; i++) {
881 
882       rvals[0] = bs*(baij->rstartbs + i);
883       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
884       for (j=ai[i]; j<ai[i+1]; j++) {
885         col = baij->garray[aj[j]]*bs;
886         for (k=0; k<bs; k++) {
887           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
888           col++;
889           a += bs;
890         }
891       }
892     }
893     ierr = PetscFree(rvals);CHKERRQ(ierr);
894     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
895     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
896     /*
897        Everyone has to call to draw the matrix since the graphics waits are
898        synchronized across all processors that share the PetscDraw object
899     */
900     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
901     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
902     if (!rank) {
903       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
904       ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
905     }
906     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
907     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
908     ierr = MatDestroy(&A);CHKERRQ(ierr);
909   }
910   PetscFunctionReturn(0);
911 }
912 
913 #undef __FUNCT__
914 #define __FUNCT__ "MatView_MPISBAIJ"
915 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
916 {
917   PetscErrorCode ierr;
918   PetscBool      iascii,isdraw,issocket,isbinary;
919 
920   PetscFunctionBegin;
921   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
922   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
923   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
924   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
925   if (iascii || isdraw || issocket || isbinary) {
926     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
927   }
928   PetscFunctionReturn(0);
929 }
930 
931 #undef __FUNCT__
932 #define __FUNCT__ "MatDestroy_MPISBAIJ"
933 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
934 {
935   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
936   PetscErrorCode ierr;
937 
938   PetscFunctionBegin;
939 #if defined(PETSC_USE_LOG)
940   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
941 #endif
942   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
943   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
944   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
945   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
946 #if defined(PETSC_USE_CTABLE)
947   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
948 #else
949   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
950 #endif
951   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
952   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
953   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
954   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
955   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
956   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
957   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
958   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
959   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
960   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
961   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
962   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
963   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
964   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
965   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
966 #if defined(PETSC_USE_REAL_MAT_SINGLE)
967   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
968 #endif
969   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
970   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
971   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
972   ierr = PetscFree(mat->data);CHKERRQ(ierr);
973 
974   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
975   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
976   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
977   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
978   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
979 #if defined(PETSC_HAVE_ELEMENTAL)
980   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
981 #endif
982   PetscFunctionReturn(0);
983 }
984 
985 #undef __FUNCT__
986 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
987 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
988 {
989   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
990   PetscErrorCode    ierr;
991   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
992   PetscScalar       *from;
993   const PetscScalar *x;
994 
995   PetscFunctionBegin;
996   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
997   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
998 
999   /* diagonal part */
1000   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1001   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1002 
1003   /* subdiagonal part */
1004   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1005 
1006   /* copy x into the vec slvec0 */
1007   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1008   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1009 
1010   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1011   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1012   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1013 
1014   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1015   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1016   /* supperdiagonal part */
1017   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1018   PetscFunctionReturn(0);
1019 }
1020 
1021 #undef __FUNCT__
1022 #define __FUNCT__ "MatMult_MPISBAIJ"
1023 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1024 {
1025   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1026   PetscErrorCode    ierr;
1027   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1028   PetscScalar       *from;
1029   const PetscScalar *x;
1030 
1031   PetscFunctionBegin;
1032   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1033   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1034 
1035   /* diagonal part */
1036   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1037   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1038 
1039   /* subdiagonal part */
1040   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1041 
1042   /* copy x into the vec slvec0 */
1043   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1044   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1045 
1046   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1047   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1048   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1049 
1050   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1051   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1052   /* supperdiagonal part */
1053   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1054   PetscFunctionReturn(0);
1055 }
1056 
1057 #undef __FUNCT__
1058 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
1059 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1060 {
1061   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1062   PetscErrorCode ierr;
1063   PetscInt       nt;
1064 
1065   PetscFunctionBegin;
1066   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1067   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1068 
1069   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1070   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1071 
1072   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1073   /* do diagonal part */
1074   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1075   /* do supperdiagonal part */
1076   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1077   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1078   /* do subdiagonal part */
1079   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1080   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1081   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 #undef __FUNCT__
1086 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1087 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1088 {
1089   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1090   PetscErrorCode    ierr;
1091   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1092   PetscScalar       *from,zero=0.0;
1093   const PetscScalar *x;
1094 
1095   PetscFunctionBegin;
1096   /*
1097   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
1098   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
1099   */
1100   /* diagonal part */
1101   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1102   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1103 
1104   /* subdiagonal part */
1105   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1106 
1107   /* copy x into the vec slvec0 */
1108   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1109   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1110   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1111   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1112 
1113   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1115   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1116 
1117   /* supperdiagonal part */
1118   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1124 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1125 {
1126   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1127   PetscErrorCode ierr;
1128 
1129   PetscFunctionBegin;
1130   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1131   /* do diagonal part */
1132   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1133   /* do supperdiagonal part */
1134   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1135   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1136 
1137   /* do subdiagonal part */
1138   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1139   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1141   PetscFunctionReturn(0);
1142 }
1143 
1144 /*
1145   This only works correctly for square matrices where the subblock A->A is the
1146    diagonal block
1147 */
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1150 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1151 {
1152   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1153   PetscErrorCode ierr;
1154 
1155   PetscFunctionBegin;
1156   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1157   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 #undef __FUNCT__
1162 #define __FUNCT__ "MatScale_MPISBAIJ"
1163 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1164 {
1165   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1166   PetscErrorCode ierr;
1167 
1168   PetscFunctionBegin;
1169   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1170   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 #undef __FUNCT__
1175 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1176 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1177 {
1178   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1179   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1180   PetscErrorCode ierr;
1181   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1182   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1183   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1184 
1185   PetscFunctionBegin;
1186   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1187   mat->getrowactive = PETSC_TRUE;
1188 
1189   if (!mat->rowvalues && (idx || v)) {
1190     /*
1191         allocate enough space to hold information from the longest row.
1192     */
1193     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1194     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1195     PetscInt     max = 1,mbs = mat->mbs,tmp;
1196     for (i=0; i<mbs; i++) {
1197       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1198       if (max < tmp) max = tmp;
1199     }
1200     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1201   }
1202 
1203   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1204   lrow = row - brstart;  /* local row index */
1205 
1206   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1207   if (!v)   {pvA = 0; pvB = 0;}
1208   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1209   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1210   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1211   nztot = nzA + nzB;
1212 
1213   cmap = mat->garray;
1214   if (v  || idx) {
1215     if (nztot) {
1216       /* Sort by increasing column numbers, assuming A and B already sorted */
1217       PetscInt imark = -1;
1218       if (v) {
1219         *v = v_p = mat->rowvalues;
1220         for (i=0; i<nzB; i++) {
1221           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1222           else break;
1223         }
1224         imark = i;
1225         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1226         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1227       }
1228       if (idx) {
1229         *idx = idx_p = mat->rowindices;
1230         if (imark > -1) {
1231           for (i=0; i<imark; i++) {
1232             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1233           }
1234         } else {
1235           for (i=0; i<nzB; i++) {
1236             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1237             else break;
1238           }
1239           imark = i;
1240         }
1241         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1242         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1243       }
1244     } else {
1245       if (idx) *idx = 0;
1246       if (v)   *v   = 0;
1247     }
1248   }
1249   *nz  = nztot;
1250   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1251   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1257 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1258 {
1259   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1260 
1261   PetscFunctionBegin;
1262   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1263   baij->getrowactive = PETSC_FALSE;
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 #undef __FUNCT__
1268 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1269 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1270 {
1271   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1272   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1273 
1274   PetscFunctionBegin;
1275   aA->getrow_utriangular = PETSC_TRUE;
1276   PetscFunctionReturn(0);
1277 }
1278 #undef __FUNCT__
1279 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1280 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1281 {
1282   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1283   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1284 
1285   PetscFunctionBegin;
1286   aA->getrow_utriangular = PETSC_FALSE;
1287   PetscFunctionReturn(0);
1288 }
1289 
1290 #undef __FUNCT__
1291 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1292 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1293 {
1294   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1295   PetscErrorCode ierr;
1296 
1297   PetscFunctionBegin;
1298   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1299   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1305 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1306 {
1307   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1308   PetscErrorCode ierr;
1309 
1310   PetscFunctionBegin;
1311   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1312   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ()
1317    Input: isrow       - distributed(parallel),
1318           iscol_local - locally owned (seq)
1319 */
1320 #undef __FUNCT__
1321 #define __FUNCT__ "ISEqual_private"
1322 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1323 {
1324   PetscErrorCode ierr;
1325   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1326   const PetscInt *ptr1,*ptr2;
1327 
1328   PetscFunctionBegin;
1329   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1330   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1331   if (sz1 > sz2) {
1332     *flg = PETSC_FALSE;
1333     PetscFunctionReturn(0);
1334   }
1335 
1336   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1337   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1338 
1339   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1340   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1341   ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr);
1342   ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr);
1343   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1344   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1345 
1346   nmatch=0;
1347   k     = 0;
1348   for (i=0; i<sz1; i++){
1349     for (j=k; j<sz2; j++){
1350       if (a1[i] == a2[j]) {
1351         k = j; nmatch++;
1352         break;
1353       }
1354     }
1355   }
1356   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1357   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1358   ierr = PetscFree(a1);CHKERRQ(ierr);
1359   ierr = PetscFree(a2);CHKERRQ(ierr);
1360   if (nmatch < sz1) {
1361     *flg = PETSC_FALSE;
1362   } else {
1363     *flg = PETSC_TRUE;
1364   }
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 #undef __FUNCT__
1369 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ"
1370 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1371 {
1372   PetscErrorCode ierr;
1373   IS             iscol_local;
1374   PetscInt       csize;
1375   PetscBool      isequal;
1376 
1377   PetscFunctionBegin;
1378   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1379   if (call == MAT_REUSE_MATRIX) {
1380     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1381     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1382   } else {
1383     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1384     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1385     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1386   }
1387 
1388   /* now call MatGetSubMatrix_MPIBAIJ() */
1389   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1390   if (call == MAT_INITIAL_MATRIX) {
1391     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1392     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1393   }
1394   PetscFunctionReturn(0);
1395 }
1396 
1397 #undef __FUNCT__
1398 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1399 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1400 {
1401   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1402   PetscErrorCode ierr;
1403 
1404   PetscFunctionBegin;
1405   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1406   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 #undef __FUNCT__
1411 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1412 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1413 {
1414   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1415   Mat            A  = a->A,B = a->B;
1416   PetscErrorCode ierr;
1417   PetscReal      isend[5],irecv[5];
1418 
1419   PetscFunctionBegin;
1420   info->block_size = (PetscReal)matin->rmap->bs;
1421 
1422   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1423 
1424   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1425   isend[3] = info->memory;  isend[4] = info->mallocs;
1426 
1427   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1428 
1429   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1430   isend[3] += info->memory;  isend[4] += info->mallocs;
1431   if (flag == MAT_LOCAL) {
1432     info->nz_used      = isend[0];
1433     info->nz_allocated = isend[1];
1434     info->nz_unneeded  = isend[2];
1435     info->memory       = isend[3];
1436     info->mallocs      = isend[4];
1437   } else if (flag == MAT_GLOBAL_MAX) {
1438     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1439 
1440     info->nz_used      = irecv[0];
1441     info->nz_allocated = irecv[1];
1442     info->nz_unneeded  = irecv[2];
1443     info->memory       = irecv[3];
1444     info->mallocs      = irecv[4];
1445   } else if (flag == MAT_GLOBAL_SUM) {
1446     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1447 
1448     info->nz_used      = irecv[0];
1449     info->nz_allocated = irecv[1];
1450     info->nz_unneeded  = irecv[2];
1451     info->memory       = irecv[3];
1452     info->mallocs      = irecv[4];
1453   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1454   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1455   info->fill_ratio_needed = 0;
1456   info->factor_mallocs    = 0;
1457   PetscFunctionReturn(0);
1458 }
1459 
1460 #undef __FUNCT__
1461 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1462 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1463 {
1464   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1465   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1466   PetscErrorCode ierr;
1467 
1468   PetscFunctionBegin;
1469   switch (op) {
1470   case MAT_NEW_NONZERO_LOCATIONS:
1471   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1472   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1473   case MAT_KEEP_NONZERO_PATTERN:
1474   case MAT_NEW_NONZERO_LOCATION_ERR:
1475     MatCheckPreallocated(A,1);
1476     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1477     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1478     break;
1479   case MAT_ROW_ORIENTED:
1480     MatCheckPreallocated(A,1);
1481     a->roworiented = flg;
1482 
1483     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1484     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1485     break;
1486   case MAT_NEW_DIAGONALS:
1487     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1488     break;
1489   case MAT_IGNORE_OFF_PROC_ENTRIES:
1490     a->donotstash = flg;
1491     break;
1492   case MAT_USE_HASH_TABLE:
1493     a->ht_flag = flg;
1494     break;
1495   case MAT_HERMITIAN:
1496     MatCheckPreallocated(A,1);
1497     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1498     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1499 
1500     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1501     break;
1502   case MAT_SPD:
1503     A->spd_set = PETSC_TRUE;
1504     A->spd     = flg;
1505     if (flg) {
1506       A->symmetric                  = PETSC_TRUE;
1507       A->structurally_symmetric     = PETSC_TRUE;
1508       A->symmetric_set              = PETSC_TRUE;
1509       A->structurally_symmetric_set = PETSC_TRUE;
1510     }
1511     break;
1512   case MAT_SYMMETRIC:
1513     MatCheckPreallocated(A,1);
1514     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1515     break;
1516   case MAT_STRUCTURALLY_SYMMETRIC:
1517     MatCheckPreallocated(A,1);
1518     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1519     break;
1520   case MAT_SYMMETRY_ETERNAL:
1521     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1522     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1523     break;
1524   case MAT_IGNORE_LOWER_TRIANGULAR:
1525     aA->ignore_ltriangular = flg;
1526     break;
1527   case MAT_ERROR_LOWER_TRIANGULAR:
1528     aA->ignore_ltriangular = flg;
1529     break;
1530   case MAT_GETROW_UPPERTRIANGULAR:
1531     aA->getrow_utriangular = flg;
1532     break;
1533   default:
1534     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1535   }
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 #undef __FUNCT__
1540 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1541 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1542 {
1543   PetscErrorCode ierr;
1544 
1545   PetscFunctionBegin;
1546   if (MAT_INITIAL_MATRIX || *B != A) {
1547     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1548   }
1549   PetscFunctionReturn(0);
1550 }
1551 
1552 #undef __FUNCT__
1553 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1554 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1555 {
1556   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1557   Mat            a     = baij->A, b=baij->B;
1558   PetscErrorCode ierr;
1559   PetscInt       nv,m,n;
1560   PetscBool      flg;
1561 
1562   PetscFunctionBegin;
1563   if (ll != rr) {
1564     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1565     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1566   }
1567   if (!ll) PetscFunctionReturn(0);
1568 
1569   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1570   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1571 
1572   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1573   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1574 
1575   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1576 
1577   /* left diagonalscale the off-diagonal part */
1578   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1579 
1580   /* scale the diagonal part */
1581   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1582 
1583   /* right diagonalscale the off-diagonal part */
1584   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1585   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 #undef __FUNCT__
1590 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1591 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1592 {
1593   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1594   PetscErrorCode ierr;
1595 
1596   PetscFunctionBegin;
1597   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1598   PetscFunctionReturn(0);
1599 }
1600 
1601 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1602 
1603 #undef __FUNCT__
1604 #define __FUNCT__ "MatEqual_MPISBAIJ"
1605 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1606 {
1607   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1608   Mat            a,b,c,d;
1609   PetscBool      flg;
1610   PetscErrorCode ierr;
1611 
1612   PetscFunctionBegin;
1613   a = matA->A; b = matA->B;
1614   c = matB->A; d = matB->B;
1615 
1616   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1617   if (flg) {
1618     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1619   }
1620   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1621   PetscFunctionReturn(0);
1622 }
1623 
1624 #undef __FUNCT__
1625 #define __FUNCT__ "MatCopy_MPISBAIJ"
1626 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1627 {
1628   PetscErrorCode ierr;
1629   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1630   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1631 
1632   PetscFunctionBegin;
1633   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1634   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1635     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1636     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1637     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1638   } else {
1639     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1640     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1641   }
1642   PetscFunctionReturn(0);
1643 }
1644 
1645 #undef __FUNCT__
1646 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1647 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1648 {
1649   PetscErrorCode ierr;
1650 
1651   PetscFunctionBegin;
1652   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1653   PetscFunctionReturn(0);
1654 }
1655 
1656 #undef __FUNCT__
1657 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1658 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1659 {
1660   PetscErrorCode ierr;
1661   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1662   PetscBLASInt   bnz,one=1;
1663   Mat_SeqSBAIJ   *xa,*ya;
1664   Mat_SeqBAIJ    *xb,*yb;
1665 
1666   PetscFunctionBegin;
1667   if (str == SAME_NONZERO_PATTERN) {
1668     PetscScalar alpha = a;
1669     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1670     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1671     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1672     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1673     xb   = (Mat_SeqBAIJ*)xx->B->data;
1674     yb   = (Mat_SeqBAIJ*)yy->B->data;
1675     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1676     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1677     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1678   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1679     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1680     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1681     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1682   } else {
1683     Mat      B;
1684     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1685     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1686     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1687     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1688     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1689     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1690     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1691     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1692     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1693     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1694     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1695     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1696     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1697     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1698     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1699     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1700     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1701     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1702     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1703     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1704   }
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 #undef __FUNCT__
1709 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1710 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1711 {
1712   PetscErrorCode ierr;
1713   PetscInt       i;
1714   PetscBool      flg;
1715 
1716   PetscFunctionBegin;
1717   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */
1718   for (i=0; i<n; i++) {
1719     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1720     if (!flg) {
1721       ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr);
1722     }
1723   }
1724   PetscFunctionReturn(0);
1725 }
1726 
1727 #undef __FUNCT__
1728 #define __FUNCT__ "MatShift_MPISBAIJ"
1729 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1730 {
1731   PetscErrorCode ierr;
1732   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1733   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1734 
1735   PetscFunctionBegin;
1736   if (!Y->preallocated) {
1737     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1738   } else if (!aij->nz) {
1739     PetscInt nonew = aij->nonew;
1740     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1741     aij->nonew = nonew;
1742   }
1743   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1744   PetscFunctionReturn(0);
1745 }
1746 
1747 #undef __FUNCT__
1748 #define __FUNCT__ "MatMissingDiagonal_MPISBAIJ"
1749 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1750 {
1751   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1752   PetscErrorCode ierr;
1753 
1754   PetscFunctionBegin;
1755   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1756   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
1757   if (d) {
1758     PetscInt rstart;
1759     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1760     *d += rstart/A->rmap->bs;
1761 
1762   }
1763   PetscFunctionReturn(0);
1764 }
1765 
1766 #undef __FUNCT__
1767 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1768 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1769 {
1770   PetscFunctionBegin;
1771   *a = ((Mat_MPISBAIJ*)A->data)->A;
1772   PetscFunctionReturn(0);
1773 }
1774 
1775 /* -------------------------------------------------------------------*/
1776 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1777                                        MatGetRow_MPISBAIJ,
1778                                        MatRestoreRow_MPISBAIJ,
1779                                        MatMult_MPISBAIJ,
1780                                /*  4*/ MatMultAdd_MPISBAIJ,
1781                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1782                                        MatMultAdd_MPISBAIJ,
1783                                        0,
1784                                        0,
1785                                        0,
1786                                /* 10*/ 0,
1787                                        0,
1788                                        0,
1789                                        MatSOR_MPISBAIJ,
1790                                        MatTranspose_MPISBAIJ,
1791                                /* 15*/ MatGetInfo_MPISBAIJ,
1792                                        MatEqual_MPISBAIJ,
1793                                        MatGetDiagonal_MPISBAIJ,
1794                                        MatDiagonalScale_MPISBAIJ,
1795                                        MatNorm_MPISBAIJ,
1796                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1797                                        MatAssemblyEnd_MPISBAIJ,
1798                                        MatSetOption_MPISBAIJ,
1799                                        MatZeroEntries_MPISBAIJ,
1800                                /* 24*/ 0,
1801                                        0,
1802                                        0,
1803                                        0,
1804                                        0,
1805                                /* 29*/ MatSetUp_MPISBAIJ,
1806                                        0,
1807                                        0,
1808                                        MatGetDiagonalBlock_MPISBAIJ,
1809                                        0,
1810                                /* 34*/ MatDuplicate_MPISBAIJ,
1811                                        0,
1812                                        0,
1813                                        0,
1814                                        0,
1815                                /* 39*/ MatAXPY_MPISBAIJ,
1816                                        MatGetSubMatrices_MPISBAIJ,
1817                                        MatIncreaseOverlap_MPISBAIJ,
1818                                        MatGetValues_MPISBAIJ,
1819                                        MatCopy_MPISBAIJ,
1820                                /* 44*/ 0,
1821                                        MatScale_MPISBAIJ,
1822                                        MatShift_MPISBAIJ,
1823                                        0,
1824                                        0,
1825                                /* 49*/ 0,
1826                                        0,
1827                                        0,
1828                                        0,
1829                                        0,
1830                                /* 54*/ 0,
1831                                        0,
1832                                        MatSetUnfactored_MPISBAIJ,
1833                                        0,
1834                                        MatSetValuesBlocked_MPISBAIJ,
1835                                /* 59*/ MatGetSubMatrix_MPISBAIJ,
1836                                        0,
1837                                        0,
1838                                        0,
1839                                        0,
1840                                /* 64*/ 0,
1841                                        0,
1842                                        0,
1843                                        0,
1844                                        0,
1845                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1846                                        0,
1847                                        0,
1848                                        0,
1849                                        0,
1850                                /* 74*/ 0,
1851                                        0,
1852                                        0,
1853                                        0,
1854                                        0,
1855                                /* 79*/ 0,
1856                                        0,
1857                                        0,
1858                                        0,
1859                                        MatLoad_MPISBAIJ,
1860                                /* 84*/ 0,
1861                                        0,
1862                                        0,
1863                                        0,
1864                                        0,
1865                                /* 89*/ 0,
1866                                        0,
1867                                        0,
1868                                        0,
1869                                        0,
1870                                /* 94*/ 0,
1871                                        0,
1872                                        0,
1873                                        0,
1874                                        0,
1875                                /* 99*/ 0,
1876                                        0,
1877                                        0,
1878                                        0,
1879                                        0,
1880                                /*104*/ 0,
1881                                        MatRealPart_MPISBAIJ,
1882                                        MatImaginaryPart_MPISBAIJ,
1883                                        MatGetRowUpperTriangular_MPISBAIJ,
1884                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1885                                /*109*/ 0,
1886                                        0,
1887                                        0,
1888                                        0,
1889                                        MatMissingDiagonal_MPISBAIJ,
1890                                /*114*/ 0,
1891                                        0,
1892                                        0,
1893                                        0,
1894                                        0,
1895                                /*119*/ 0,
1896                                        0,
1897                                        0,
1898                                        0,
1899                                        0,
1900                                /*124*/ 0,
1901                                        0,
1902                                        0,
1903                                        0,
1904                                        0,
1905                                /*129*/ 0,
1906                                        0,
1907                                        0,
1908                                        0,
1909                                        0,
1910                                /*134*/ 0,
1911                                        0,
1912                                        0,
1913                                        0,
1914                                        0,
1915                                /*139*/ 0,
1916                                        0,
1917                                        0,
1918                                        0,
1919                                        0,
1920                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
1921 };
1922 
1923 #undef __FUNCT__
1924 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1925 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1926 {
1927   Mat_MPISBAIJ   *b;
1928   PetscErrorCode ierr;
1929   PetscInt       i,mbs,Mbs;
1930 
1931   PetscFunctionBegin;
1932   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1933   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1934   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1935   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1936 
1937   b   = (Mat_MPISBAIJ*)B->data;
1938   mbs = B->rmap->n/bs;
1939   Mbs = B->rmap->N/bs;
1940   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1941 
1942   B->rmap->bs = bs;
1943   b->bs2      = bs*bs;
1944   b->mbs      = mbs;
1945   b->Mbs      = Mbs;
1946   b->nbs      = B->cmap->n/bs;
1947   b->Nbs      = B->cmap->N/bs;
1948 
1949   for (i=0; i<=b->size; i++) {
1950     b->rangebs[i] = B->rmap->range[i]/bs;
1951   }
1952   b->rstartbs = B->rmap->rstart/bs;
1953   b->rendbs   = B->rmap->rend/bs;
1954 
1955   b->cstartbs = B->cmap->rstart/bs;
1956   b->cendbs   = B->cmap->rend/bs;
1957 
1958   if (!B->preallocated) {
1959     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1960     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1961     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1962     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1963     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1964     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1965     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1966     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1967     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1968   }
1969 
1970   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1971   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1972 
1973   B->preallocated = PETSC_TRUE;
1974   PetscFunctionReturn(0);
1975 }
1976 
1977 #undef __FUNCT__
1978 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1979 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1980 {
1981   PetscInt       m,rstart,cstart,cend;
1982   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1983   const PetscInt *JJ    =0;
1984   PetscScalar    *values=0;
1985   PetscErrorCode ierr;
1986 
1987   PetscFunctionBegin;
1988   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1989   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1990   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1991   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1992   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1993   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1994   m      = B->rmap->n/bs;
1995   rstart = B->rmap->rstart/bs;
1996   cstart = B->cmap->rstart/bs;
1997   cend   = B->cmap->rend/bs;
1998 
1999   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2000   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2001   for (i=0; i<m; i++) {
2002     nz = ii[i+1] - ii[i];
2003     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2004     nz_max = PetscMax(nz_max,nz);
2005     JJ     = jj + ii[i];
2006     for (j=0; j<nz; j++) {
2007       if (*JJ >= cstart) break;
2008       JJ++;
2009     }
2010     d = 0;
2011     for (; j<nz; j++) {
2012       if (*JJ++ >= cend) break;
2013       d++;
2014     }
2015     d_nnz[i] = d;
2016     o_nnz[i] = nz - d;
2017   }
2018   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2019   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2020 
2021   values = (PetscScalar*)V;
2022   if (!values) {
2023     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2024     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2025   }
2026   for (i=0; i<m; i++) {
2027     PetscInt          row    = i + rstart;
2028     PetscInt          ncols  = ii[i+1] - ii[i];
2029     const PetscInt    *icols = jj + ii[i];
2030     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2031     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2032   }
2033 
2034   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2035   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2036   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2037   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2038   PetscFunctionReturn(0);
2039 }
2040 
2041 /*MC
2042    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2043    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2044    the matrix is stored.
2045 
2046   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2047   can call MatSetOption(Mat, MAT_HERMITIAN);
2048 
2049    Options Database Keys:
2050 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2051 
2052   Level: beginner
2053 
2054 .seealso: MatCreateMPISBAIJ
2055 M*/
2056 
2057 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
2058 
2059 #undef __FUNCT__
2060 #define __FUNCT__ "MatCreate_MPISBAIJ"
2061 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2062 {
2063   Mat_MPISBAIJ   *b;
2064   PetscErrorCode ierr;
2065   PetscBool      flg = PETSC_FALSE;
2066 
2067   PetscFunctionBegin;
2068   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2069   B->data = (void*)b;
2070   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2071 
2072   B->ops->destroy = MatDestroy_MPISBAIJ;
2073   B->ops->view    = MatView_MPISBAIJ;
2074   B->assembled    = PETSC_FALSE;
2075   B->insertmode   = NOT_SET_VALUES;
2076 
2077   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2078   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2079 
2080   /* build local table of row and column ownerships */
2081   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2082 
2083   /* build cache for off array entries formed */
2084   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2085 
2086   b->donotstash  = PETSC_FALSE;
2087   b->colmap      = NULL;
2088   b->garray      = NULL;
2089   b->roworiented = PETSC_TRUE;
2090 
2091   /* stuff used in block assembly */
2092   b->barray = 0;
2093 
2094   /* stuff used for matrix vector multiply */
2095   b->lvec    = 0;
2096   b->Mvctx   = 0;
2097   b->slvec0  = 0;
2098   b->slvec0b = 0;
2099   b->slvec1  = 0;
2100   b->slvec1a = 0;
2101   b->slvec1b = 0;
2102   b->sMvctx  = 0;
2103 
2104   /* stuff for MatGetRow() */
2105   b->rowindices   = 0;
2106   b->rowvalues    = 0;
2107   b->getrowactive = PETSC_FALSE;
2108 
2109   /* hash table stuff */
2110   b->ht           = 0;
2111   b->hd           = 0;
2112   b->ht_size      = 0;
2113   b->ht_flag      = PETSC_FALSE;
2114   b->ht_fact      = 0;
2115   b->ht_total_ct  = 0;
2116   b->ht_insert_ct = 0;
2117 
2118   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
2119   b->ijonly = PETSC_FALSE;
2120 
2121   b->in_loc = 0;
2122   b->v_loc  = 0;
2123   b->n_loc  = 0;
2124 
2125   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2126   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2127   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2128   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2129 #if defined(PETSC_HAVE_ELEMENTAL)
2130   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2131 #endif
2132 
2133   B->symmetric                  = PETSC_TRUE;
2134   B->structurally_symmetric     = PETSC_TRUE;
2135   B->symmetric_set              = PETSC_TRUE;
2136   B->structurally_symmetric_set = PETSC_TRUE;
2137 
2138   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2139   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2140   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2141   if (flg) {
2142     PetscReal fact = 1.39;
2143     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2144     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2145     if (fact <= 1.0) fact = 1.39;
2146     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2147     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2148   }
2149   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 /*MC
2154    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2155 
2156    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2157    and MATMPISBAIJ otherwise.
2158 
2159    Options Database Keys:
2160 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2161 
2162   Level: beginner
2163 
2164 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
2165 M*/
2166 
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
2169 /*@C
2170    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2171    the user should preallocate the matrix storage by setting the parameters
2172    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2173    performance can be increased by more than a factor of 50.
2174 
2175    Collective on Mat
2176 
2177    Input Parameters:
2178 +  B - the matrix
2179 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2180           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2181 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2182            submatrix  (same for all local rows)
2183 .  d_nnz - array containing the number of block nonzeros in the various block rows
2184            in the upper triangular and diagonal part of the in diagonal portion of the local
2185            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2186            for the diagonal entry and set a value even if it is zero.
2187 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2188            submatrix (same for all local rows).
2189 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2190            off-diagonal portion of the local submatrix that is right of the diagonal
2191            (possibly different for each block row) or NULL.
2192 
2193 
2194    Options Database Keys:
2195 .   -mat_no_unroll - uses code that does not unroll the loops in the
2196                      block calculations (much slower)
2197 .   -mat_block_size - size of the blocks to use
2198 
2199    Notes:
2200 
2201    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2202    than it must be used on all processors that share the object for that argument.
2203 
2204    If the *_nnz parameter is given then the *_nz parameter is ignored
2205 
2206    Storage Information:
2207    For a square global matrix we define each processor's diagonal portion
2208    to be its local rows and the corresponding columns (a square submatrix);
2209    each processor's off-diagonal portion encompasses the remainder of the
2210    local matrix (a rectangular submatrix).
2211 
2212    The user can specify preallocated storage for the diagonal part of
2213    the local submatrix with either d_nz or d_nnz (not both).  Set
2214    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2215    memory allocation.  Likewise, specify preallocated storage for the
2216    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2217 
2218    You can call MatGetInfo() to get information on how effective the preallocation was;
2219    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2220    You can also run with the option -info and look for messages with the string
2221    malloc in them to see if additional memory allocation was needed.
2222 
2223    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2224    the figure below we depict these three local rows and all columns (0-11).
2225 
2226 .vb
2227            0 1 2 3 4 5 6 7 8 9 10 11
2228           --------------------------
2229    row 3  |. . . d d d o o o o  o  o
2230    row 4  |. . . d d d o o o o  o  o
2231    row 5  |. . . d d d o o o o  o  o
2232           --------------------------
2233 .ve
2234 
2235    Thus, any entries in the d locations are stored in the d (diagonal)
2236    submatrix, and any entries in the o locations are stored in the
2237    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2238    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2239 
2240    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2241    plus the diagonal part of the d matrix,
2242    and o_nz should indicate the number of block nonzeros per row in the o matrix
2243 
2244    In general, for PDE problems in which most nonzeros are near the diagonal,
2245    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2246    or you will get TERRIBLE performance; see the users' manual chapter on
2247    matrices.
2248 
2249    Level: intermediate
2250 
2251 .keywords: matrix, block, aij, compressed row, sparse, parallel
2252 
2253 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2254 @*/
2255 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2256 {
2257   PetscErrorCode ierr;
2258 
2259   PetscFunctionBegin;
2260   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2261   PetscValidType(B,1);
2262   PetscValidLogicalCollectiveInt(B,bs,2);
2263   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2264   PetscFunctionReturn(0);
2265 }
2266 
2267 #undef __FUNCT__
2268 #define __FUNCT__ "MatCreateSBAIJ"
2269 /*@C
2270    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2271    (block compressed row).  For good matrix assembly performance
2272    the user should preallocate the matrix storage by setting the parameters
2273    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2274    performance can be increased by more than a factor of 50.
2275 
2276    Collective on MPI_Comm
2277 
2278    Input Parameters:
2279 +  comm - MPI communicator
2280 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2281           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2282 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2283            This value should be the same as the local size used in creating the
2284            y vector for the matrix-vector product y = Ax.
2285 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2286            This value should be the same as the local size used in creating the
2287            x vector for the matrix-vector product y = Ax.
2288 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2289 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2290 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2291            submatrix  (same for all local rows)
2292 .  d_nnz - array containing the number of block nonzeros in the various block rows
2293            in the upper triangular portion of the in diagonal portion of the local
2294            (possibly different for each block block row) or NULL.
2295            If you plan to factor the matrix you must leave room for the diagonal entry and
2296            set its value even if it is zero.
2297 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2298            submatrix (same for all local rows).
2299 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2300            off-diagonal portion of the local submatrix (possibly different for
2301            each block row) or NULL.
2302 
2303    Output Parameter:
2304 .  A - the matrix
2305 
2306    Options Database Keys:
2307 .   -mat_no_unroll - uses code that does not unroll the loops in the
2308                      block calculations (much slower)
2309 .   -mat_block_size - size of the blocks to use
2310 .   -mat_mpi - use the parallel matrix data structures even on one processor
2311                (defaults to using SeqBAIJ format on one processor)
2312 
2313    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2314    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2315    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2316 
2317    Notes:
2318    The number of rows and columns must be divisible by blocksize.
2319    This matrix type does not support complex Hermitian operation.
2320 
2321    The user MUST specify either the local or global matrix dimensions
2322    (possibly both).
2323 
2324    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2325    than it must be used on all processors that share the object for that argument.
2326 
2327    If the *_nnz parameter is given then the *_nz parameter is ignored
2328 
2329    Storage Information:
2330    For a square global matrix we define each processor's diagonal portion
2331    to be its local rows and the corresponding columns (a square submatrix);
2332    each processor's off-diagonal portion encompasses the remainder of the
2333    local matrix (a rectangular submatrix).
2334 
2335    The user can specify preallocated storage for the diagonal part of
2336    the local submatrix with either d_nz or d_nnz (not both).  Set
2337    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2338    memory allocation.  Likewise, specify preallocated storage for the
2339    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2340 
2341    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2342    the figure below we depict these three local rows and all columns (0-11).
2343 
2344 .vb
2345            0 1 2 3 4 5 6 7 8 9 10 11
2346           --------------------------
2347    row 3  |. . . d d d o o o o  o  o
2348    row 4  |. . . d d d o o o o  o  o
2349    row 5  |. . . d d d o o o o  o  o
2350           --------------------------
2351 .ve
2352 
2353    Thus, any entries in the d locations are stored in the d (diagonal)
2354    submatrix, and any entries in the o locations are stored in the
2355    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2356    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2357 
2358    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2359    plus the diagonal part of the d matrix,
2360    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2361    In general, for PDE problems in which most nonzeros are near the diagonal,
2362    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2363    or you will get TERRIBLE performance; see the users' manual chapter on
2364    matrices.
2365 
2366    Level: intermediate
2367 
2368 .keywords: matrix, block, aij, compressed row, sparse, parallel
2369 
2370 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2371 @*/
2372 
2373 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2374 {
2375   PetscErrorCode ierr;
2376   PetscMPIInt    size;
2377 
2378   PetscFunctionBegin;
2379   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2380   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2381   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2382   if (size > 1) {
2383     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2384     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2385   } else {
2386     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2387     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2388   }
2389   PetscFunctionReturn(0);
2390 }
2391 
2392 
2393 #undef __FUNCT__
2394 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2395 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2396 {
2397   Mat            mat;
2398   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2399   PetscErrorCode ierr;
2400   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2401   PetscScalar    *array;
2402 
2403   PetscFunctionBegin;
2404   *newmat = 0;
2405 
2406   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2407   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2408   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2409   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2410   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2411   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2412 
2413   mat->factortype   = matin->factortype;
2414   mat->preallocated = PETSC_TRUE;
2415   mat->assembled    = PETSC_TRUE;
2416   mat->insertmode   = NOT_SET_VALUES;
2417 
2418   a      = (Mat_MPISBAIJ*)mat->data;
2419   a->bs2 = oldmat->bs2;
2420   a->mbs = oldmat->mbs;
2421   a->nbs = oldmat->nbs;
2422   a->Mbs = oldmat->Mbs;
2423   a->Nbs = oldmat->Nbs;
2424 
2425 
2426   a->size         = oldmat->size;
2427   a->rank         = oldmat->rank;
2428   a->donotstash   = oldmat->donotstash;
2429   a->roworiented  = oldmat->roworiented;
2430   a->rowindices   = 0;
2431   a->rowvalues    = 0;
2432   a->getrowactive = PETSC_FALSE;
2433   a->barray       = 0;
2434   a->rstartbs     = oldmat->rstartbs;
2435   a->rendbs       = oldmat->rendbs;
2436   a->cstartbs     = oldmat->cstartbs;
2437   a->cendbs       = oldmat->cendbs;
2438 
2439   /* hash table stuff */
2440   a->ht           = 0;
2441   a->hd           = 0;
2442   a->ht_size      = 0;
2443   a->ht_flag      = oldmat->ht_flag;
2444   a->ht_fact      = oldmat->ht_fact;
2445   a->ht_total_ct  = 0;
2446   a->ht_insert_ct = 0;
2447 
2448   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2449   if (oldmat->colmap) {
2450 #if defined(PETSC_USE_CTABLE)
2451     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2452 #else
2453     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2454     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2455     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2456 #endif
2457   } else a->colmap = 0;
2458 
2459   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2460     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2461     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2462     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2463   } else a->garray = 0;
2464 
2465   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2466   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2467   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2468   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2469   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2470 
2471   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2472   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2473   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2474   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2475 
2476   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2477   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2478   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2479   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2480   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2481   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2482   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2483   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2484   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2485   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2486   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2487   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2488   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2489 
2490   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2491   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2492   a->sMvctx = oldmat->sMvctx;
2493   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2494 
2495   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2496   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2497   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2498   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2499   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2500   *newmat = mat;
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 #undef __FUNCT__
2505 #define __FUNCT__ "MatLoad_MPISBAIJ"
2506 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2507 {
2508   PetscErrorCode ierr;
2509   PetscInt       i,nz,j,rstart,rend;
2510   PetscScalar    *vals,*buf;
2511   MPI_Comm       comm;
2512   MPI_Status     status;
2513   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2514   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2515   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2516   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2517   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2518   PetscInt       dcount,kmax,k,nzcount,tmp;
2519   int            fd;
2520 
2521   PetscFunctionBegin;
2522   /* force binary viewer to load .info file if it has not yet done so */
2523   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2524   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2525   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2526   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2527   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2528   if (bs < 0) bs = 1;
2529 
2530   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2531   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2532   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2533   if (!rank) {
2534     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2535     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2536     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2537   }
2538 
2539   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2540   M    = header[1];
2541   N    = header[2];
2542 
2543   /* If global sizes are set, check if they are consistent with that given in the file */
2544   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
2545   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
2546 
2547   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2548 
2549   /*
2550      This code adds extra rows to make sure the number of rows is
2551      divisible by the blocksize
2552   */
2553   Mbs        = M/bs;
2554   extra_rows = bs - M + bs*(Mbs);
2555   if (extra_rows == bs) extra_rows = 0;
2556   else                  Mbs++;
2557   if (extra_rows &&!rank) {
2558     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2559   }
2560 
2561   /* determine ownership of all rows */
2562   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2563     mbs = Mbs/size + ((Mbs % size) > rank);
2564     m   = mbs*bs;
2565   } else { /* User Set */
2566     m   = newmat->rmap->n;
2567     mbs = m/bs;
2568   }
2569   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2570   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2571   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2572   rowners[0] = 0;
2573   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2574   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2575   rstart = rowners[rank];
2576   rend   = rowners[rank+1];
2577 
2578   /* distribute row lengths to all processors */
2579   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2580   if (!rank) {
2581     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2582     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2583     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2584     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2585     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2586     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2587     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2588   } else {
2589     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2590   }
2591 
2592   if (!rank) {   /* procs[0] */
2593     /* calculate the number of nonzeros on each processor */
2594     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2595     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2596     for (i=0; i<size; i++) {
2597       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2598         procsnz[i] += rowlengths[j];
2599       }
2600     }
2601     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2602 
2603     /* determine max buffer needed and allocate it */
2604     maxnz = 0;
2605     for (i=0; i<size; i++) {
2606       maxnz = PetscMax(maxnz,procsnz[i]);
2607     }
2608     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2609 
2610     /* read in my part of the matrix column indices  */
2611     nz     = procsnz[0];
2612     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2613     mycols = ibuf;
2614     if (size == 1) nz -= extra_rows;
2615     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2616     if (size == 1) {
2617       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2618     }
2619 
2620     /* read in every ones (except the last) and ship off */
2621     for (i=1; i<size-1; i++) {
2622       nz   = procsnz[i];
2623       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2624       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2625     }
2626     /* read in the stuff for the last proc */
2627     if (size != 1) {
2628       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2629       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2630       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2631       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2632     }
2633     ierr = PetscFree(cols);CHKERRQ(ierr);
2634   } else {  /* procs[i], i>0 */
2635     /* determine buffer space needed for message */
2636     nz = 0;
2637     for (i=0; i<m; i++) nz += locrowlens[i];
2638     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2639     mycols = ibuf;
2640     /* receive message of column indices*/
2641     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2642     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2643     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2644   }
2645 
2646   /* loop over local rows, determining number of off diagonal entries */
2647   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2648   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2649   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2650   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2651   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2652   rowcount = 0;
2653   nzcount  = 0;
2654   for (i=0; i<mbs; i++) {
2655     dcount  = 0;
2656     odcount = 0;
2657     for (j=0; j<bs; j++) {
2658       kmax = locrowlens[rowcount];
2659       for (k=0; k<kmax; k++) {
2660         tmp = mycols[nzcount++]/bs; /* block col. index */
2661         if (!mask[tmp]) {
2662           mask[tmp] = 1;
2663           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2664           else masked1[dcount++] = tmp; /* entry in diag portion */
2665         }
2666       }
2667       rowcount++;
2668     }
2669 
2670     dlens[i]  = dcount;  /* d_nzz[i] */
2671     odlens[i] = odcount; /* o_nzz[i] */
2672 
2673     /* zero out the mask elements we set */
2674     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2675     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2676   }
2677   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2678   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2679   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2680 
2681   if (!rank) {
2682     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2683     /* read in my part of the matrix numerical values  */
2684     nz     = procsnz[0];
2685     vals   = buf;
2686     mycols = ibuf;
2687     if (size == 1) nz -= extra_rows;
2688     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2689     if (size == 1) {
2690       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2691     }
2692 
2693     /* insert into matrix */
2694     jj = rstart*bs;
2695     for (i=0; i<m; i++) {
2696       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2697       mycols += locrowlens[i];
2698       vals   += locrowlens[i];
2699       jj++;
2700     }
2701 
2702     /* read in other processors (except the last one) and ship out */
2703     for (i=1; i<size-1; i++) {
2704       nz   = procsnz[i];
2705       vals = buf;
2706       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2707       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2708     }
2709     /* the last proc */
2710     if (size != 1) {
2711       nz   = procsnz[i] - extra_rows;
2712       vals = buf;
2713       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2714       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2715       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2716     }
2717     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2718 
2719   } else {
2720     /* receive numeric values */
2721     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2722 
2723     /* receive message of values*/
2724     vals   = buf;
2725     mycols = ibuf;
2726     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2727     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2728     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2729 
2730     /* insert into matrix */
2731     jj = rstart*bs;
2732     for (i=0; i<m; i++) {
2733       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2734       mycols += locrowlens[i];
2735       vals   += locrowlens[i];
2736       jj++;
2737     }
2738   }
2739 
2740   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2741   ierr = PetscFree(buf);CHKERRQ(ierr);
2742   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2743   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2744   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2745   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2746   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2747   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2748   PetscFunctionReturn(0);
2749 }
2750 
2751 #undef __FUNCT__
2752 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2753 /*XXXXX@
2754    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2755 
2756    Input Parameters:
2757 .  mat  - the matrix
2758 .  fact - factor
2759 
2760    Not Collective on Mat, each process can have a different hash factor
2761 
2762    Level: advanced
2763 
2764   Notes:
2765    This can also be set by the command line option: -mat_use_hash_table fact
2766 
2767 .keywords: matrix, hashtable, factor, HT
2768 
2769 .seealso: MatSetOption()
2770 @XXXXX*/
2771 
2772 
2773 #undef __FUNCT__
2774 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2775 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2776 {
2777   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2778   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2779   PetscReal      atmp;
2780   PetscReal      *work,*svalues,*rvalues;
2781   PetscErrorCode ierr;
2782   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2783   PetscMPIInt    rank,size;
2784   PetscInt       *rowners_bs,dest,count,source;
2785   PetscScalar    *va;
2786   MatScalar      *ba;
2787   MPI_Status     stat;
2788 
2789   PetscFunctionBegin;
2790   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2791   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2792   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2793 
2794   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2795   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2796 
2797   bs  = A->rmap->bs;
2798   mbs = a->mbs;
2799   Mbs = a->Mbs;
2800   ba  = b->a;
2801   bi  = b->i;
2802   bj  = b->j;
2803 
2804   /* find ownerships */
2805   rowners_bs = A->rmap->range;
2806 
2807   /* each proc creates an array to be distributed */
2808   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2809   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2810 
2811   /* row_max for B */
2812   if (rank != size-1) {
2813     for (i=0; i<mbs; i++) {
2814       ncols = bi[1] - bi[0]; bi++;
2815       brow  = bs*i;
2816       for (j=0; j<ncols; j++) {
2817         bcol = bs*(*bj);
2818         for (kcol=0; kcol<bs; kcol++) {
2819           col  = bcol + kcol;                /* local col index */
2820           col += rowners_bs[rank+1];      /* global col index */
2821           for (krow=0; krow<bs; krow++) {
2822             atmp = PetscAbsScalar(*ba); ba++;
2823             row  = brow + krow;   /* local row index */
2824             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2825             if (work[col] < atmp) work[col] = atmp;
2826           }
2827         }
2828         bj++;
2829       }
2830     }
2831 
2832     /* send values to its owners */
2833     for (dest=rank+1; dest<size; dest++) {
2834       svalues = work + rowners_bs[dest];
2835       count   = rowners_bs[dest+1]-rowners_bs[dest];
2836       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2837     }
2838   }
2839 
2840   /* receive values */
2841   if (rank) {
2842     rvalues = work;
2843     count   = rowners_bs[rank+1]-rowners_bs[rank];
2844     for (source=0; source<rank; source++) {
2845       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2846       /* process values */
2847       for (i=0; i<count; i++) {
2848         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2849       }
2850     }
2851   }
2852 
2853   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2854   ierr = PetscFree(work);CHKERRQ(ierr);
2855   PetscFunctionReturn(0);
2856 }
2857 
2858 #undef __FUNCT__
2859 #define __FUNCT__ "MatSOR_MPISBAIJ"
2860 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2861 {
2862   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2863   PetscErrorCode    ierr;
2864   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2865   PetscScalar       *x,*ptr,*from;
2866   Vec               bb1;
2867   const PetscScalar *b;
2868 
2869   PetscFunctionBegin;
2870   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2871   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2872 
2873   if (flag == SOR_APPLY_UPPER) {
2874     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2875     PetscFunctionReturn(0);
2876   }
2877 
2878   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2879     if (flag & SOR_ZERO_INITIAL_GUESS) {
2880       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2881       its--;
2882     }
2883 
2884     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2885     while (its--) {
2886 
2887       /* lower triangular part: slvec0b = - B^T*xx */
2888       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2889 
2890       /* copy xx into slvec0a */
2891       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2892       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2893       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2894       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2895 
2896       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2897 
2898       /* copy bb into slvec1a */
2899       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2900       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2901       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2902       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2903 
2904       /* set slvec1b = 0 */
2905       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2906 
2907       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2908       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2909       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2910       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2911 
2912       /* upper triangular part: bb1 = bb1 - B*x */
2913       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2914 
2915       /* local diagonal sweep */
2916       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2917     }
2918     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2919   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2920     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2921   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2922     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2923   } else if (flag & SOR_EISENSTAT) {
2924     Vec               xx1;
2925     PetscBool         hasop;
2926     const PetscScalar *diag;
2927     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2928     PetscInt          i,n;
2929 
2930     if (!mat->xx1) {
2931       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2932       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2933     }
2934     xx1 = mat->xx1;
2935     bb1 = mat->bb1;
2936 
2937     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2938 
2939     if (!mat->diag) {
2940       /* this is wrong for same matrix with new nonzero values */
2941       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2942       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2943     }
2944     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2945 
2946     if (hasop) {
2947       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2948       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2949     } else {
2950       /*
2951           These two lines are replaced by code that may be a bit faster for a good compiler
2952       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2953       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2954       */
2955       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2956       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2957       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2958       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2959       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2960       if (omega == 1.0) {
2961         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2962         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2963       } else {
2964         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2965         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2966       }
2967       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2968       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2969       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2970       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2971     }
2972 
2973     /* multiply off-diagonal portion of matrix */
2974     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2975     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2976     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2977     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2978     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2979     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2980     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2981     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2982     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2983     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2984 
2985     /* local sweep */
2986     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2987     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2988   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2989   PetscFunctionReturn(0);
2990 }
2991 
2992 #undef __FUNCT__
2993 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2994 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2995 {
2996   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2997   PetscErrorCode ierr;
2998   Vec            lvec1,bb1;
2999 
3000   PetscFunctionBegin;
3001   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3002   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3003 
3004   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3005     if (flag & SOR_ZERO_INITIAL_GUESS) {
3006       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
3007       its--;
3008     }
3009 
3010     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
3011     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
3012     while (its--) {
3013       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3014 
3015       /* lower diagonal part: bb1 = bb - B^T*xx */
3016       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
3017       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
3018 
3019       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3020       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
3021       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3022 
3023       /* upper diagonal part: bb1 = bb1 - B*x */
3024       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
3025       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
3026 
3027       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3028 
3029       /* diagonal sweep */
3030       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3031     }
3032     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
3033     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3034   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3035   PetscFunctionReturn(0);
3036 }
3037 
3038 #undef __FUNCT__
3039 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
3040 /*@
3041      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
3042          CSR format the local rows.
3043 
3044    Collective on MPI_Comm
3045 
3046    Input Parameters:
3047 +  comm - MPI communicator
3048 .  bs - the block size, only a block size of 1 is supported
3049 .  m - number of local rows (Cannot be PETSC_DECIDE)
3050 .  n - This value should be the same as the local size used in creating the
3051        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3052        calculated if N is given) For square matrices n is almost always m.
3053 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3054 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3055 .   i - row indices
3056 .   j - column indices
3057 -   a - matrix values
3058 
3059    Output Parameter:
3060 .   mat - the matrix
3061 
3062    Level: intermediate
3063 
3064    Notes:
3065        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3066      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3067      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3068 
3069        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3070 
3071 .keywords: matrix, aij, compressed row, sparse, parallel
3072 
3073 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3074           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3075 @*/
3076 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3077 {
3078   PetscErrorCode ierr;
3079 
3080 
3081   PetscFunctionBegin;
3082   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3083   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3084   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3085   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3086   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3087   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3088   PetscFunctionReturn(0);
3089 }
3090 
3091 
3092 #undef __FUNCT__
3093 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
3094 /*@C
3095    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
3096    (the default parallel PETSc format).
3097 
3098    Collective on MPI_Comm
3099 
3100    Input Parameters:
3101 +  B - the matrix
3102 .  bs - the block size
3103 .  i - the indices into j for the start of each local row (starts with zero)
3104 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3105 -  v - optional values in the matrix
3106 
3107    Level: developer
3108 
3109 .keywords: matrix, aij, compressed row, sparse, parallel
3110 
3111 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
3112 @*/
3113 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3114 {
3115   PetscErrorCode ierr;
3116 
3117   PetscFunctionBegin;
3118   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3119   PetscFunctionReturn(0);
3120 }
3121 
3122 #undef __FUNCT__
3123 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ"
3124 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3125 {
3126   PetscErrorCode ierr;
3127   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3128   PetscInt       *indx;
3129   PetscScalar    *values;
3130 
3131   PetscFunctionBegin;
3132   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3133   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3134     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
3135     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
3136     PetscInt       *bindx,rmax=a->rmax,j;
3137 
3138     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3139     mbs = m/bs; Nbs = N/cbs;
3140     if (n == PETSC_DECIDE) {
3141       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
3142     }
3143     /* Check sum(n) = Nbs */
3144     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3145     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
3146 
3147     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3148     rstart -= mbs;
3149 
3150     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3151     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
3152     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3153     for (i=0; i<mbs; i++) {
3154       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3155       nnz = nnz/bs;
3156       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3157       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3158       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3159     }
3160     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3161     ierr = PetscFree(bindx);CHKERRQ(ierr);
3162 
3163     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3164     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3165     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3166     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
3167     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3168     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3169   }
3170 
3171   /* numeric phase */
3172   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3173   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3174 
3175   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3176   for (i=0; i<m; i++) {
3177     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3178     Ii   = i + rstart;
3179     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3180     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3181   }
3182   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3183   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3184   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3185   PetscFunctionReturn(0);
3186 }
3187