xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision bebe2cf65d55febe21a5af8db2bd2e168caaa2e7)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 #undef __FUNCT__
11 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
12 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
13 {
14   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
15   PetscErrorCode ierr;
16 
17   PetscFunctionBegin;
18   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
19   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
20   PetscFunctionReturn(0);
21 }
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
25 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
26 {
27   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
28   PetscErrorCode ierr;
29 
30   PetscFunctionBegin;
31   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
32   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
33   PetscFunctionReturn(0);
34 }
35 
36 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
37   { \
38  \
39     brow = row/bs;  \
40     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
41     rmax = aimax[brow]; nrow = ailen[brow]; \
42     bcol = col/bs; \
43     ridx = row % bs; cidx = col % bs; \
44     low  = 0; high = nrow; \
45     while (high-low > 3) { \
46       t = (low+high)/2; \
47       if (rp[t] > bcol) high = t; \
48       else              low  = t; \
49     } \
50     for (_i=low; _i<high; _i++) { \
51       if (rp[_i] > bcol) break; \
52       if (rp[_i] == bcol) { \
53         bap = ap + bs2*_i + bs*cidx + ridx; \
54         if (addv == ADD_VALUES) *bap += value;  \
55         else                    *bap  = value;  \
56         goto a_noinsert; \
57       } \
58     } \
59     if (a->nonew == 1) goto a_noinsert; \
60     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
61     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
62     N = nrow++ - 1;  \
63     /* shift up all the later entries in this row */ \
64     for (ii=N; ii>=_i; ii--) { \
65       rp[ii+1] = rp[ii]; \
66       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
67     } \
68     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
69     rp[_i]                      = bcol;  \
70     ap[bs2*_i + bs*cidx + ridx] = value;  \
71     A->nonzerostate++;\
72 a_noinsert:; \
73     ailen[brow] = nrow; \
74   }
75 
76 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
77   { \
78     brow = row/bs;  \
79     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
80     rmax = bimax[brow]; nrow = bilen[brow]; \
81     bcol = col/bs; \
82     ridx = row % bs; cidx = col % bs; \
83     low  = 0; high = nrow; \
84     while (high-low > 3) { \
85       t = (low+high)/2; \
86       if (rp[t] > bcol) high = t; \
87       else              low  = t; \
88     } \
89     for (_i=low; _i<high; _i++) { \
90       if (rp[_i] > bcol) break; \
91       if (rp[_i] == bcol) { \
92         bap = ap + bs2*_i + bs*cidx + ridx; \
93         if (addv == ADD_VALUES) *bap += value;  \
94         else                    *bap  = value;  \
95         goto b_noinsert; \
96       } \
97     } \
98     if (b->nonew == 1) goto b_noinsert; \
99     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
100     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
101     N = nrow++ - 1;  \
102     /* shift up all the later entries in this row */ \
103     for (ii=N; ii>=_i; ii--) { \
104       rp[ii+1] = rp[ii]; \
105       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
106     } \
107     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
108     rp[_i]                      = bcol;  \
109     ap[bs2*_i + bs*cidx + ridx] = value;  \
110     B->nonzerostate++;\
111 b_noinsert:; \
112     bilen[brow] = nrow; \
113   }
114 
115 /* Only add/insert a(i,j) with i<=j (blocks).
116    Any a(i,j) with i>j input by user is ingored.
117 */
118 #undef __FUNCT__
119 #define __FUNCT__ "MatSetValues_MPISBAIJ"
120 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
121 {
122   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
123   MatScalar      value;
124   PetscBool      roworiented = baij->roworiented;
125   PetscErrorCode ierr;
126   PetscInt       i,j,row,col;
127   PetscInt       rstart_orig=mat->rmap->rstart;
128   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
129   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
130 
131   /* Some Variables required in the macro */
132   Mat          A     = baij->A;
133   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
134   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
135   MatScalar    *aa   =a->a;
136 
137   Mat         B     = baij->B;
138   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
139   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
140   MatScalar   *ba   =b->a;
141 
142   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
143   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
144   MatScalar *ap,*bap;
145 
146   /* for stash */
147   PetscInt  n_loc, *in_loc = NULL;
148   MatScalar *v_loc = NULL;
149 
150   PetscFunctionBegin;
151   if (!baij->donotstash) {
152     if (n > baij->n_loc) {
153       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
154       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
155       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
156       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
157 
158       baij->n_loc = n;
159     }
160     in_loc = baij->in_loc;
161     v_loc  = baij->v_loc;
162   }
163 
164   for (i=0; i<m; i++) {
165     if (im[i] < 0) continue;
166 #if defined(PETSC_USE_DEBUG)
167     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
168 #endif
169     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
170       row = im[i] - rstart_orig;              /* local row index */
171       for (j=0; j<n; j++) {
172         if (im[i]/bs > in[j]/bs) {
173           if (a->ignore_ltriangular) {
174             continue;    /* ignore lower triangular blocks */
175           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
176         }
177         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
178           col  = in[j] - cstart_orig;         /* local col index */
179           brow = row/bs; bcol = col/bs;
180           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
181           if (roworiented) value = v[i*n+j];
182           else             value = v[i+j*m];
183           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
184           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
185         } else if (in[j] < 0) continue;
186 #if defined(PETSC_USE_DEBUG)
187         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
188 #endif
189         else {  /* off-diag entry (B) */
190           if (mat->was_assembled) {
191             if (!baij->colmap) {
192               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
193             }
194 #if defined(PETSC_USE_CTABLE)
195             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
196             col  = col - 1;
197 #else
198             col = baij->colmap[in[j]/bs] - 1;
199 #endif
200             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
201               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
202               col  =  in[j];
203               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
204               B    = baij->B;
205               b    = (Mat_SeqBAIJ*)(B)->data;
206               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
207               ba   = b->a;
208             } else col += in[j]%bs;
209           } else col = in[j];
210           if (roworiented) value = v[i*n+j];
211           else             value = v[i+j*m];
212           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
213           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
214         }
215       }
216     } else {  /* off processor entry */
217       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
218       if (!baij->donotstash) {
219         mat->assembled = PETSC_FALSE;
220         n_loc          = 0;
221         for (j=0; j<n; j++) {
222           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
223           in_loc[n_loc] = in[j];
224           if (roworiented) {
225             v_loc[n_loc] = v[i*n+j];
226           } else {
227             v_loc[n_loc] = v[j*m+i];
228           }
229           n_loc++;
230         }
231         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ_Inlined"
240 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
241 {
242   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
243   PetscErrorCode    ierr;
244   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
245   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
246   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
247   PetscBool         roworiented=a->roworiented;
248   const PetscScalar *value     = v;
249   MatScalar         *ap,*aa = a->a,*bap;
250 
251   PetscFunctionBegin;
252   if (col < row) {
253     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
254     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
255   }
256   rp   = aj + ai[row];
257   ap   = aa + bs2*ai[row];
258   rmax = imax[row];
259   nrow = ailen[row];
260   value = v;
261   low   = 0;
262   high  = nrow;
263 
264   while (high-low > 7) {
265     t = (low+high)/2;
266     if (rp[t] > col) high = t;
267     else             low  = t;
268   }
269   for (i=low; i<high; i++) {
270     if (rp[i] > col) break;
271     if (rp[i] == col) {
272       bap = ap +  bs2*i;
273       if (roworiented) {
274         if (is == ADD_VALUES) {
275           for (ii=0; ii<bs; ii++) {
276             for (jj=ii; jj<bs2; jj+=bs) {
277               bap[jj] += *value++;
278             }
279           }
280         } else {
281           for (ii=0; ii<bs; ii++) {
282             for (jj=ii; jj<bs2; jj+=bs) {
283               bap[jj] = *value++;
284             }
285           }
286         }
287       } else {
288         if (is == ADD_VALUES) {
289           for (ii=0; ii<bs; ii++) {
290             for (jj=0; jj<bs; jj++) {
291               *bap++ += *value++;
292             }
293           }
294         } else {
295           for (ii=0; ii<bs; ii++) {
296             for (jj=0; jj<bs; jj++) {
297               *bap++  = *value++;
298             }
299           }
300         }
301       }
302       goto noinsert2;
303     }
304   }
305   if (nonew == 1) goto noinsert2;
306   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
307   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
308   N = nrow++ - 1; high++;
309   /* shift up all the later entries in this row */
310   for (ii=N; ii>=i; ii--) {
311     rp[ii+1] = rp[ii];
312     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
313   }
314   if (N >= i) {
315     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
316   }
317   rp[i] = col;
318   bap   = ap +  bs2*i;
319   if (roworiented) {
320     for (ii=0; ii<bs; ii++) {
321       for (jj=ii; jj<bs2; jj+=bs) {
322         bap[jj] = *value++;
323       }
324     }
325   } else {
326     for (ii=0; ii<bs; ii++) {
327       for (jj=0; jj<bs; jj++) {
328         *bap++ = *value++;
329       }
330     }
331   }
332   noinsert2:;
333   ailen[row] = nrow;
334   PetscFunctionReturn(0);
335 }
336 
337 #undef __FUNCT__
338 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined"
339 /*
340    This routine is exactly duplicated in mpibaij.c
341 */
342 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
343 {
344   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
345   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
346   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
347   PetscErrorCode    ierr;
348   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
349   PetscBool         roworiented=a->roworiented;
350   const PetscScalar *value     = v;
351   MatScalar         *ap,*aa = a->a,*bap;
352 
353   PetscFunctionBegin;
354   rp   = aj + ai[row];
355   ap   = aa + bs2*ai[row];
356   rmax = imax[row];
357   nrow = ailen[row];
358   low  = 0;
359   high = nrow;
360   value = v;
361   while (high-low > 7) {
362     t = (low+high)/2;
363     if (rp[t] > col) high = t;
364     else             low  = t;
365   }
366   for (i=low; i<high; i++) {
367     if (rp[i] > col) break;
368     if (rp[i] == col) {
369       bap = ap +  bs2*i;
370       if (roworiented) {
371         if (is == ADD_VALUES) {
372           for (ii=0; ii<bs; ii++) {
373             for (jj=ii; jj<bs2; jj+=bs) {
374               bap[jj] += *value++;
375             }
376           }
377         } else {
378           for (ii=0; ii<bs; ii++) {
379             for (jj=ii; jj<bs2; jj+=bs) {
380               bap[jj] = *value++;
381             }
382           }
383         }
384       } else {
385         if (is == ADD_VALUES) {
386           for (ii=0; ii<bs; ii++,value+=bs) {
387             for (jj=0; jj<bs; jj++) {
388               bap[jj] += value[jj];
389             }
390             bap += bs;
391           }
392         } else {
393           for (ii=0; ii<bs; ii++,value+=bs) {
394             for (jj=0; jj<bs; jj++) {
395               bap[jj]  = value[jj];
396             }
397             bap += bs;
398           }
399         }
400       }
401       goto noinsert2;
402     }
403   }
404   if (nonew == 1) goto noinsert2;
405   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
406   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
407   N = nrow++ - 1; high++;
408   /* shift up all the later entries in this row */
409   for (ii=N; ii>=i; ii--) {
410     rp[ii+1] = rp[ii];
411     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
412   }
413   if (N >= i) {
414     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
415   }
416   rp[i] = col;
417   bap   = ap +  bs2*i;
418   if (roworiented) {
419     for (ii=0; ii<bs; ii++) {
420       for (jj=ii; jj<bs2; jj+=bs) {
421         bap[jj] = *value++;
422       }
423     }
424   } else {
425     for (ii=0; ii<bs; ii++) {
426       for (jj=0; jj<bs; jj++) {
427         *bap++ = *value++;
428       }
429     }
430   }
431   noinsert2:;
432   ailen[row] = nrow;
433   PetscFunctionReturn(0);
434 }
435 
436 #undef __FUNCT__
437 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
438 /*
439     This routine could be optimized by removing the need for the block copy below and passing stride information
440   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
441 */
442 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
443 {
444   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
445   const MatScalar *value;
446   MatScalar       *barray     =baij->barray;
447   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
448   PetscErrorCode  ierr;
449   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
450   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
451   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
452 
453   PetscFunctionBegin;
454   if (!barray) {
455     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
456     baij->barray = barray;
457   }
458 
459   if (roworiented) {
460     stepval = (n-1)*bs;
461   } else {
462     stepval = (m-1)*bs;
463   }
464   for (i=0; i<m; i++) {
465     if (im[i] < 0) continue;
466 #if defined(PETSC_USE_DEBUG)
467     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
468 #endif
469     if (im[i] >= rstart && im[i] < rend) {
470       row = im[i] - rstart;
471       for (j=0; j<n; j++) {
472         if (im[i] > in[j]) {
473           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
474           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
475         }
476         /* If NumCol = 1 then a copy is not required */
477         if ((roworiented) && (n == 1)) {
478           barray = (MatScalar*) v + i*bs2;
479         } else if ((!roworiented) && (m == 1)) {
480           barray = (MatScalar*) v + j*bs2;
481         } else { /* Here a copy is required */
482           if (roworiented) {
483             value = v + i*(stepval+bs)*bs + j*bs;
484           } else {
485             value = v + j*(stepval+bs)*bs + i*bs;
486           }
487           for (ii=0; ii<bs; ii++,value+=stepval) {
488             for (jj=0; jj<bs; jj++) {
489               *barray++ = *value++;
490             }
491           }
492           barray -=bs2;
493         }
494 
495         if (in[j] >= cstart && in[j] < cend) {
496           col  = in[j] - cstart;
497           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
498         } else if (in[j] < 0) continue;
499 #if defined(PETSC_USE_DEBUG)
500         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
501 #endif
502         else {
503           if (mat->was_assembled) {
504             if (!baij->colmap) {
505               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
506             }
507 
508 #if defined(PETSC_USE_DEBUG)
509 #if defined(PETSC_USE_CTABLE)
510             { PetscInt data;
511               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
512               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
513             }
514 #else
515             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
516 #endif
517 #endif
518 #if defined(PETSC_USE_CTABLE)
519             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
520             col  = (col - 1)/bs;
521 #else
522             col = (baij->colmap[in[j]] - 1)/bs;
523 #endif
524             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
525               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
526               col  = in[j];
527             }
528           } else col = in[j];
529           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
530         }
531       }
532     } else {
533       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
534       if (!baij->donotstash) {
535         if (roworiented) {
536           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
537         } else {
538           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
539         }
540       }
541     }
542   }
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatGetValues_MPISBAIJ"
548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
549 {
550   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
551   PetscErrorCode ierr;
552   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
553   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
554 
555   PetscFunctionBegin;
556   for (i=0; i<m; i++) {
557     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
558     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
559     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
560       row = idxm[i] - bsrstart;
561       for (j=0; j<n; j++) {
562         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
563         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
564         if (idxn[j] >= bscstart && idxn[j] < bscend) {
565           col  = idxn[j] - bscstart;
566           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
567         } else {
568           if (!baij->colmap) {
569             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
570           }
571 #if defined(PETSC_USE_CTABLE)
572           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
573           data--;
574 #else
575           data = baij->colmap[idxn[j]/bs]-1;
576 #endif
577           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
578           else {
579             col  = data + idxn[j]%bs;
580             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
581           }
582         }
583       }
584     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
585   }
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatNorm_MPISBAIJ"
591 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
592 {
593   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
594   PetscErrorCode ierr;
595   PetscReal      sum[2],*lnorm2;
596 
597   PetscFunctionBegin;
598   if (baij->size == 1) {
599     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
600   } else {
601     if (type == NORM_FROBENIUS) {
602       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
603       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
604       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
605       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
606       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
607       ierr    = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
608       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
609       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
610     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
611       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
612       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
613       PetscReal    *rsum,*rsum2,vabs;
614       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
615       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
616       MatScalar    *v;
617 
618       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
619       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
620       /* Amat */
621       v = amat->a; jj = amat->j;
622       for (brow=0; brow<mbs; brow++) {
623         grow = bs*(rstart + brow);
624         nz   = amat->i[brow+1] - amat->i[brow];
625         for (bcol=0; bcol<nz; bcol++) {
626           gcol = bs*(rstart + *jj); jj++;
627           for (col=0; col<bs; col++) {
628             for (row=0; row<bs; row++) {
629               vabs            = PetscAbsScalar(*v); v++;
630               rsum[gcol+col] += vabs;
631               /* non-diagonal block */
632               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
633             }
634           }
635         }
636       }
637       /* Bmat */
638       v = bmat->a; jj = bmat->j;
639       for (brow=0; brow<mbs; brow++) {
640         grow = bs*(rstart + brow);
641         nz = bmat->i[brow+1] - bmat->i[brow];
642         for (bcol=0; bcol<nz; bcol++) {
643           gcol = bs*garray[*jj]; jj++;
644           for (col=0; col<bs; col++) {
645             for (row=0; row<bs; row++) {
646               vabs            = PetscAbsScalar(*v); v++;
647               rsum[gcol+col] += vabs;
648               rsum[grow+row] += vabs;
649             }
650           }
651         }
652       }
653       ierr  = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
654       *norm = 0.0;
655       for (col=0; col<mat->cmap->N; col++) {
656         if (rsum2[col] > *norm) *norm = rsum2[col];
657       }
658       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
659     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
660   }
661   PetscFunctionReturn(0);
662 }
663 
664 #undef __FUNCT__
665 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
666 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
667 {
668   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
669   PetscErrorCode ierr;
670   PetscInt       nstash,reallocs;
671   InsertMode     addv;
672 
673   PetscFunctionBegin;
674   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
675 
676   /* make sure all processors are either in INSERTMODE or ADDMODE */
677   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
678   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
679   mat->insertmode = addv; /* in case this processor had no cache */
680 
681   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
682   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
683   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
684   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
685   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
686   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 
690 #undef __FUNCT__
691 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
692 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
693 {
694   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
695   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
696   PetscErrorCode ierr;
697   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
698   PetscInt       *row,*col;
699   PetscBool      other_disassembled;
700   PetscMPIInt    n;
701   PetscBool      r1,r2,r3;
702   MatScalar      *val;
703   InsertMode     addv = mat->insertmode;
704 
705   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
706   PetscFunctionBegin;
707   if (!baij->donotstash &&  !mat->nooffprocentries) {
708     while (1) {
709       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
710       if (!flg) break;
711 
712       for (i=0; i<n;) {
713         /* Now identify the consecutive vals belonging to the same row */
714         for (j=i,rstart=row[j]; j<n; j++) {
715           if (row[j] != rstart) break;
716         }
717         if (j < n) ncols = j-i;
718         else       ncols = n-i;
719         /* Now assemble all these values with a single function call */
720         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
721         i    = j;
722       }
723     }
724     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
725     /* Now process the block-stash. Since the values are stashed column-oriented,
726        set the roworiented flag to column oriented, and after MatSetValues()
727        restore the original flags */
728     r1 = baij->roworiented;
729     r2 = a->roworiented;
730     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
731 
732     baij->roworiented = PETSC_FALSE;
733     a->roworiented    = PETSC_FALSE;
734 
735     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
736     while (1) {
737       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
738       if (!flg) break;
739 
740       for (i=0; i<n;) {
741         /* Now identify the consecutive vals belonging to the same row */
742         for (j=i,rstart=row[j]; j<n; j++) {
743           if (row[j] != rstart) break;
744         }
745         if (j < n) ncols = j-i;
746         else       ncols = n-i;
747         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
748         i    = j;
749       }
750     }
751     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
752 
753     baij->roworiented = r1;
754     a->roworiented    = r2;
755 
756     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
757   }
758 
759   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
760   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
761 
762   /* determine if any processor has disassembled, if so we must
763      also disassemble ourselfs, in order that we may reassemble. */
764   /*
765      if nonzero structure of submatrix B cannot change then we know that
766      no processor disassembled thus we can skip this stuff
767   */
768   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
769     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
770     if (mat->was_assembled && !other_disassembled) {
771       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
772     }
773   }
774 
775   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
776     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
777   }
778   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
779   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
780 
781   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
782 
783   baij->rowvalues = 0;
784 
785   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
786   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
787     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
788     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
789   }
790   PetscFunctionReturn(0);
791 }
792 
793 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer);
794 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
795 #include <petscdraw.h>
796 #undef __FUNCT__
797 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
798 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
799 {
800   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
801   PetscErrorCode    ierr;
802   PetscInt          bs   = mat->rmap->bs;
803   PetscMPIInt       rank = baij->rank;
804   PetscBool         iascii,isdraw;
805   PetscViewer       sviewer;
806   PetscViewerFormat format;
807 
808   PetscFunctionBegin;
809   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
810   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
811   if (iascii) {
812     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
813     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
814       MatInfo info;
815       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
816       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
817       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
818       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
819       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
820       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
821       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
822       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
823       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
824       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
825       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
826       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
827       PetscFunctionReturn(0);
828     } else if (format == PETSC_VIEWER_ASCII_INFO) {
829       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
830       PetscFunctionReturn(0);
831     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
832       PetscFunctionReturn(0);
833     }
834   }
835 
836   if (isdraw) {
837     PetscDraw draw;
838     PetscBool isnull;
839     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
840     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
841   }
842 
843   {
844     /* assemble the entire matrix onto first processor. */
845     Mat          A;
846     Mat_SeqSBAIJ *Aloc;
847     Mat_SeqBAIJ  *Bloc;
848     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
849     MatScalar    *a;
850     const char   *matname;
851 
852     /* Should this be the same type as mat? */
853     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
854     if (!rank) {
855       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
856     } else {
857       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
858     }
859     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
860     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
861     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
862     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
863 
864     /* copy over the A part */
865     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
866     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
867     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
868 
869     for (i=0; i<mbs; i++) {
870       rvals[0] = bs*(baij->rstartbs + i);
871       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
872       for (j=ai[i]; j<ai[i+1]; j++) {
873         col = (baij->cstartbs+aj[j])*bs;
874         for (k=0; k<bs; k++) {
875           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
876           col++;
877           a += bs;
878         }
879       }
880     }
881     /* copy over the B part */
882     Bloc = (Mat_SeqBAIJ*)baij->B->data;
883     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
884     for (i=0; i<mbs; i++) {
885 
886       rvals[0] = bs*(baij->rstartbs + i);
887       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
888       for (j=ai[i]; j<ai[i+1]; j++) {
889         col = baij->garray[aj[j]]*bs;
890         for (k=0; k<bs; k++) {
891           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
892           col++;
893           a += bs;
894         }
895       }
896     }
897     ierr = PetscFree(rvals);CHKERRQ(ierr);
898     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
899     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
900     /*
901        Everyone has to call to draw the matrix since the graphics waits are
902        synchronized across all processors that share the PetscDraw object
903     */
904     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
905     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
906     if (!rank) {
907       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
908       ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
909     }
910     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
911     ierr = MatDestroy(&A);CHKERRQ(ierr);
912   }
913   PetscFunctionReturn(0);
914 }
915 
916 #undef __FUNCT__
917 #define __FUNCT__ "MatView_MPISBAIJ"
918 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
919 {
920   PetscErrorCode ierr;
921   PetscBool      iascii,isdraw,issocket,isbinary;
922 
923   PetscFunctionBegin;
924   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
925   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
926   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
927   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
928   if (iascii || isdraw || issocket || isbinary) {
929     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
930   }
931   PetscFunctionReturn(0);
932 }
933 
934 #undef __FUNCT__
935 #define __FUNCT__ "MatDestroy_MPISBAIJ"
936 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
937 {
938   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
939   PetscErrorCode ierr;
940 
941   PetscFunctionBegin;
942 #if defined(PETSC_USE_LOG)
943   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
944 #endif
945   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
946   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
947   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
948   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
949 #if defined(PETSC_USE_CTABLE)
950   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
951 #else
952   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
953 #endif
954   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
955   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
956   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
957   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
958   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
959   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
960   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
961   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
962   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
963   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
964   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
965   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
966   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
967   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
968   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
969 #if defined(PETSC_USE_REAL_MAT_SINGLE)
970   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
971 #endif
972   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
973   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
974   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
975   ierr = PetscFree(mat->data);CHKERRQ(ierr);
976 
977   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
978   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
979   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
980   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
981   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
982   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
983 #if defined(PETSC_HAVE_ELEMENTAL)
984   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
985 #endif
986   PetscFunctionReturn(0);
987 }
988 
989 #undef __FUNCT__
990 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
991 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
992 {
993   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
994   PetscErrorCode ierr;
995   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
996   PetscScalar    *x,*from;
997 
998   PetscFunctionBegin;
999   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1000   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1001 
1002   /* diagonal part */
1003   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1004   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1005 
1006   /* subdiagonal part */
1007   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1008 
1009   /* copy x into the vec slvec0 */
1010   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1011   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1012 
1013   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1014   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1015   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1016 
1017   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1018   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1019   /* supperdiagonal part */
1020   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 #undef __FUNCT__
1025 #define __FUNCT__ "MatMult_MPISBAIJ"
1026 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1027 {
1028   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1029   PetscErrorCode    ierr;
1030   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1031   PetscScalar       *from;
1032   const PetscScalar *x;
1033 
1034   PetscFunctionBegin;
1035   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1036   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1037 
1038   /* diagonal part */
1039   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1040   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1041 
1042   /* subdiagonal part */
1043   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1044 
1045   /* copy x into the vec slvec0 */
1046   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1047   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1048 
1049   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1050   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1051   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1052 
1053   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1054   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1055   /* supperdiagonal part */
1056   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1057   PetscFunctionReturn(0);
1058 }
1059 
1060 #undef __FUNCT__
1061 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
1062 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1063 {
1064   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1065   PetscErrorCode ierr;
1066   PetscInt       nt;
1067 
1068   PetscFunctionBegin;
1069   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1070   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1071 
1072   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1073   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1074 
1075   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1076   /* do diagonal part */
1077   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1078   /* do supperdiagonal part */
1079   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1080   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1081   /* do subdiagonal part */
1082   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1083   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1084   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1090 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1091 {
1092   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1093   PetscErrorCode    ierr;
1094   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1095   PetscScalar       *from,zero=0.0;
1096   const PetscScalar *x;
1097 
1098   PetscFunctionBegin;
1099   /*
1100   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
1101   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
1102   */
1103   /* diagonal part */
1104   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1105   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1106 
1107   /* subdiagonal part */
1108   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1109 
1110   /* copy x into the vec slvec0 */
1111   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1112   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1113   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1114   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1115 
1116   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1117   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1118   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1119 
1120   /* supperdiagonal part */
1121   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 #undef __FUNCT__
1126 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1127 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1128 {
1129   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1130   PetscErrorCode ierr;
1131 
1132   PetscFunctionBegin;
1133   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1134   /* do diagonal part */
1135   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1136   /* do supperdiagonal part */
1137   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1138   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1139 
1140   /* do subdiagonal part */
1141   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1142   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1143   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 /*
1148   This only works correctly for square matrices where the subblock A->A is the
1149    diagonal block
1150 */
1151 #undef __FUNCT__
1152 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1153 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1154 {
1155   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1156   PetscErrorCode ierr;
1157 
1158   PetscFunctionBegin;
1159   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1160   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 #undef __FUNCT__
1165 #define __FUNCT__ "MatScale_MPISBAIJ"
1166 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1167 {
1168   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1169   PetscErrorCode ierr;
1170 
1171   PetscFunctionBegin;
1172   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1173   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1179 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1180 {
1181   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1182   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1183   PetscErrorCode ierr;
1184   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1185   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1186   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1187 
1188   PetscFunctionBegin;
1189   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1190   mat->getrowactive = PETSC_TRUE;
1191 
1192   if (!mat->rowvalues && (idx || v)) {
1193     /*
1194         allocate enough space to hold information from the longest row.
1195     */
1196     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1197     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1198     PetscInt     max = 1,mbs = mat->mbs,tmp;
1199     for (i=0; i<mbs; i++) {
1200       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1201       if (max < tmp) max = tmp;
1202     }
1203     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1204   }
1205 
1206   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1207   lrow = row - brstart;  /* local row index */
1208 
1209   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1210   if (!v)   {pvA = 0; pvB = 0;}
1211   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1212   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1213   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1214   nztot = nzA + nzB;
1215 
1216   cmap = mat->garray;
1217   if (v  || idx) {
1218     if (nztot) {
1219       /* Sort by increasing column numbers, assuming A and B already sorted */
1220       PetscInt imark = -1;
1221       if (v) {
1222         *v = v_p = mat->rowvalues;
1223         for (i=0; i<nzB; i++) {
1224           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1225           else break;
1226         }
1227         imark = i;
1228         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1229         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1230       }
1231       if (idx) {
1232         *idx = idx_p = mat->rowindices;
1233         if (imark > -1) {
1234           for (i=0; i<imark; i++) {
1235             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1236           }
1237         } else {
1238           for (i=0; i<nzB; i++) {
1239             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1240             else break;
1241           }
1242           imark = i;
1243         }
1244         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1245         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1246       }
1247     } else {
1248       if (idx) *idx = 0;
1249       if (v)   *v   = 0;
1250     }
1251   }
1252   *nz  = nztot;
1253   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1254   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1260 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1261 {
1262   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1263 
1264   PetscFunctionBegin;
1265   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1266   baij->getrowactive = PETSC_FALSE;
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 #undef __FUNCT__
1271 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1272 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1273 {
1274   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1275   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1276 
1277   PetscFunctionBegin;
1278   aA->getrow_utriangular = PETSC_TRUE;
1279   PetscFunctionReturn(0);
1280 }
1281 #undef __FUNCT__
1282 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1283 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1284 {
1285   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1286   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1287 
1288   PetscFunctionBegin;
1289   aA->getrow_utriangular = PETSC_FALSE;
1290   PetscFunctionReturn(0);
1291 }
1292 
1293 #undef __FUNCT__
1294 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1295 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1296 {
1297   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1298   PetscErrorCode ierr;
1299 
1300   PetscFunctionBegin;
1301   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1302   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 #undef __FUNCT__
1307 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1308 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1309 {
1310   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1311   PetscErrorCode ierr;
1312 
1313   PetscFunctionBegin;
1314   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1315   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ()
1320    Input: isrow       - distributed(parallel),
1321           iscol_local - locally owned (seq)
1322 */
1323 #undef __FUNCT__
1324 #define __FUNCT__ "ISEqual_private"
1325 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1326 {
1327   PetscErrorCode ierr;
1328   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1329   const PetscInt *ptr1,*ptr2;
1330 
1331   PetscFunctionBegin;
1332   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1333   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1334   if (sz1 > sz2) {
1335     *flg = PETSC_FALSE;
1336     PetscFunctionReturn(0);
1337   }
1338 
1339   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1340   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1341 
1342   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1343   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1344   ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr);
1345   ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr);
1346   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1347   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1348 
1349   nmatch=0;
1350   k     = 0;
1351   for (i=0; i<sz1; i++){
1352     for (j=k; j<sz2; j++){
1353       if (a1[i] == a2[j]) {
1354         k = j; nmatch++;
1355         break;
1356       }
1357     }
1358   }
1359   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1360   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1361   ierr = PetscFree(a1);CHKERRQ(ierr);
1362   ierr = PetscFree(a2);CHKERRQ(ierr);
1363   if (nmatch < sz1) {
1364     *flg = PETSC_FALSE;
1365   } else {
1366     *flg = PETSC_TRUE;
1367   }
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 #undef __FUNCT__
1372 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ"
1373 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1374 {
1375   PetscErrorCode ierr;
1376   IS             iscol_local;
1377   PetscInt       csize;
1378   PetscBool      isequal;
1379 
1380   PetscFunctionBegin;
1381   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1382   if (call == MAT_REUSE_MATRIX) {
1383     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1384     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1385   } else {
1386     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1387     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1388     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1389   }
1390 
1391   /* now call MatGetSubMatrix_MPIBAIJ() */
1392   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1393   if (call == MAT_INITIAL_MATRIX) {
1394     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1395     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1396   }
1397   PetscFunctionReturn(0);
1398 }
1399 
1400 #undef __FUNCT__
1401 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1402 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1403 {
1404   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1405   PetscErrorCode ierr;
1406 
1407   PetscFunctionBegin;
1408   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1409   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1415 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1416 {
1417   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1418   Mat            A  = a->A,B = a->B;
1419   PetscErrorCode ierr;
1420   PetscReal      isend[5],irecv[5];
1421 
1422   PetscFunctionBegin;
1423   info->block_size = (PetscReal)matin->rmap->bs;
1424 
1425   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1426 
1427   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1428   isend[3] = info->memory;  isend[4] = info->mallocs;
1429 
1430   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1431 
1432   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1433   isend[3] += info->memory;  isend[4] += info->mallocs;
1434   if (flag == MAT_LOCAL) {
1435     info->nz_used      = isend[0];
1436     info->nz_allocated = isend[1];
1437     info->nz_unneeded  = isend[2];
1438     info->memory       = isend[3];
1439     info->mallocs      = isend[4];
1440   } else if (flag == MAT_GLOBAL_MAX) {
1441     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1442 
1443     info->nz_used      = irecv[0];
1444     info->nz_allocated = irecv[1];
1445     info->nz_unneeded  = irecv[2];
1446     info->memory       = irecv[3];
1447     info->mallocs      = irecv[4];
1448   } else if (flag == MAT_GLOBAL_SUM) {
1449     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1450 
1451     info->nz_used      = irecv[0];
1452     info->nz_allocated = irecv[1];
1453     info->nz_unneeded  = irecv[2];
1454     info->memory       = irecv[3];
1455     info->mallocs      = irecv[4];
1456   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1457   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1458   info->fill_ratio_needed = 0;
1459   info->factor_mallocs    = 0;
1460   PetscFunctionReturn(0);
1461 }
1462 
1463 #undef __FUNCT__
1464 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1465 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1466 {
1467   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1468   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1469   PetscErrorCode ierr;
1470 
1471   PetscFunctionBegin;
1472   switch (op) {
1473   case MAT_NEW_NONZERO_LOCATIONS:
1474   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1475   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1476   case MAT_KEEP_NONZERO_PATTERN:
1477   case MAT_NEW_NONZERO_LOCATION_ERR:
1478     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1479     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1480     break;
1481   case MAT_ROW_ORIENTED:
1482     a->roworiented = flg;
1483 
1484     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1485     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1486     break;
1487   case MAT_NEW_DIAGONALS:
1488     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1489     break;
1490   case MAT_IGNORE_OFF_PROC_ENTRIES:
1491     a->donotstash = flg;
1492     break;
1493   case MAT_USE_HASH_TABLE:
1494     a->ht_flag = flg;
1495     break;
1496   case MAT_HERMITIAN:
1497     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1498     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1499 
1500     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1501     break;
1502   case MAT_SPD:
1503     A->spd_set = PETSC_TRUE;
1504     A->spd     = flg;
1505     if (flg) {
1506       A->symmetric                  = PETSC_TRUE;
1507       A->structurally_symmetric     = PETSC_TRUE;
1508       A->symmetric_set              = PETSC_TRUE;
1509       A->structurally_symmetric_set = PETSC_TRUE;
1510     }
1511     break;
1512   case MAT_SYMMETRIC:
1513     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1514     break;
1515   case MAT_STRUCTURALLY_SYMMETRIC:
1516     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1517     break;
1518   case MAT_SYMMETRY_ETERNAL:
1519     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1520     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1521     break;
1522   case MAT_IGNORE_LOWER_TRIANGULAR:
1523     aA->ignore_ltriangular = flg;
1524     break;
1525   case MAT_ERROR_LOWER_TRIANGULAR:
1526     aA->ignore_ltriangular = flg;
1527     break;
1528   case MAT_GETROW_UPPERTRIANGULAR:
1529     aA->getrow_utriangular = flg;
1530     break;
1531   default:
1532     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1533   }
1534   PetscFunctionReturn(0);
1535 }
1536 
1537 #undef __FUNCT__
1538 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1539 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1540 {
1541   PetscErrorCode ierr;
1542 
1543   PetscFunctionBegin;
1544   if (MAT_INITIAL_MATRIX || *B != A) {
1545     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1546   }
1547   PetscFunctionReturn(0);
1548 }
1549 
1550 #undef __FUNCT__
1551 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1552 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1553 {
1554   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1555   Mat            a     = baij->A, b=baij->B;
1556   PetscErrorCode ierr;
1557   PetscInt       nv,m,n;
1558   PetscBool      flg;
1559 
1560   PetscFunctionBegin;
1561   if (ll != rr) {
1562     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1563     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1564   }
1565   if (!ll) PetscFunctionReturn(0);
1566 
1567   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1568   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1569 
1570   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1571   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1572 
1573   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1574 
1575   /* left diagonalscale the off-diagonal part */
1576   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1577 
1578   /* scale the diagonal part */
1579   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1580 
1581   /* right diagonalscale the off-diagonal part */
1582   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1583   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1584   PetscFunctionReturn(0);
1585 }
1586 
1587 #undef __FUNCT__
1588 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1589 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1590 {
1591   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1592   PetscErrorCode ierr;
1593 
1594   PetscFunctionBegin;
1595   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1596   PetscFunctionReturn(0);
1597 }
1598 
1599 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1600 
1601 #undef __FUNCT__
1602 #define __FUNCT__ "MatEqual_MPISBAIJ"
1603 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1604 {
1605   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1606   Mat            a,b,c,d;
1607   PetscBool      flg;
1608   PetscErrorCode ierr;
1609 
1610   PetscFunctionBegin;
1611   a = matA->A; b = matA->B;
1612   c = matB->A; d = matB->B;
1613 
1614   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1615   if (flg) {
1616     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1617   }
1618   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1619   PetscFunctionReturn(0);
1620 }
1621 
1622 #undef __FUNCT__
1623 #define __FUNCT__ "MatCopy_MPISBAIJ"
1624 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1625 {
1626   PetscErrorCode ierr;
1627   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1628   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1629 
1630   PetscFunctionBegin;
1631   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1632   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1633     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1634     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1635     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1636   } else {
1637     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1638     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1639   }
1640   PetscFunctionReturn(0);
1641 }
1642 
1643 #undef __FUNCT__
1644 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1645 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1646 {
1647   PetscErrorCode ierr;
1648 
1649   PetscFunctionBegin;
1650   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1651   PetscFunctionReturn(0);
1652 }
1653 
1654 #undef __FUNCT__
1655 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1656 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1657 {
1658   PetscErrorCode ierr;
1659   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1660   PetscBLASInt   bnz,one=1;
1661   Mat_SeqSBAIJ   *xa,*ya;
1662   Mat_SeqBAIJ    *xb,*yb;
1663 
1664   PetscFunctionBegin;
1665   if (str == SAME_NONZERO_PATTERN) {
1666     PetscScalar alpha = a;
1667     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1668     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1669     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1670     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1671     xb   = (Mat_SeqBAIJ*)xx->B->data;
1672     yb   = (Mat_SeqBAIJ*)yy->B->data;
1673     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1674     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1675     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1676   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1677     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1678     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1679     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1680   } else {
1681     Mat      B;
1682     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1683     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1684     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1685     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1686     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1687     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1688     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1689     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1690     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1691     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1692     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1693     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1694     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1695     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1696     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1697     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
1698     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1699     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1700     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1701     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1702   }
1703   PetscFunctionReturn(0);
1704 }
1705 
1706 #undef __FUNCT__
1707 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1708 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1709 {
1710   PetscErrorCode ierr;
1711   PetscInt       i;
1712   PetscBool      flg,sorted;
1713 
1714   PetscFunctionBegin;
1715   for (i = 0; i < n; i++) {
1716     ierr = ISSorted(irow[i],&sorted);CHKERRQ(ierr);
1717     if (!sorted) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row index set %d not sorted",i);
1718     ierr = ISSorted(icol[i],&sorted);CHKERRQ(ierr);
1719     if (!sorted) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Column index set %d not sorted",i);
1720   }
1721   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1722   for (i=0; i<n; i++) {
1723     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1724     if (!flg) { /* *B[i] is non-symmetric, set flag */
1725       ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr);
1726     }
1727   }
1728   PetscFunctionReturn(0);
1729 }
1730 
1731 #undef __FUNCT__
1732 #define __FUNCT__ "MatShift_MPISBAIJ"
1733 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1734 {
1735   PetscErrorCode ierr;
1736   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1737   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data,*bij = (Mat_SeqSBAIJ*)maij->B->data;
1738 
1739   PetscFunctionBegin;
1740   if (!aij->nz && !bij->nz) {
1741     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1742   }
1743   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1744   PetscFunctionReturn(0);
1745 }
1746 
1747 /* -------------------------------------------------------------------*/
1748 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1749                                        MatGetRow_MPISBAIJ,
1750                                        MatRestoreRow_MPISBAIJ,
1751                                        MatMult_MPISBAIJ,
1752                                /*  4*/ MatMultAdd_MPISBAIJ,
1753                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1754                                        MatMultAdd_MPISBAIJ,
1755                                        0,
1756                                        0,
1757                                        0,
1758                                /* 10*/ 0,
1759                                        0,
1760                                        0,
1761                                        MatSOR_MPISBAIJ,
1762                                        MatTranspose_MPISBAIJ,
1763                                /* 15*/ MatGetInfo_MPISBAIJ,
1764                                        MatEqual_MPISBAIJ,
1765                                        MatGetDiagonal_MPISBAIJ,
1766                                        MatDiagonalScale_MPISBAIJ,
1767                                        MatNorm_MPISBAIJ,
1768                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1769                                        MatAssemblyEnd_MPISBAIJ,
1770                                        MatSetOption_MPISBAIJ,
1771                                        MatZeroEntries_MPISBAIJ,
1772                                /* 24*/ 0,
1773                                        0,
1774                                        0,
1775                                        0,
1776                                        0,
1777                                /* 29*/ MatSetUp_MPISBAIJ,
1778                                        0,
1779                                        0,
1780                                        0,
1781                                        0,
1782                                /* 34*/ MatDuplicate_MPISBAIJ,
1783                                        0,
1784                                        0,
1785                                        0,
1786                                        0,
1787                                /* 39*/ MatAXPY_MPISBAIJ,
1788                                        MatGetSubMatrices_MPISBAIJ,
1789                                        MatIncreaseOverlap_MPISBAIJ,
1790                                        MatGetValues_MPISBAIJ,
1791                                        MatCopy_MPISBAIJ,
1792                                /* 44*/ 0,
1793                                        MatScale_MPISBAIJ,
1794                                        MatShift_MPISBAIJ,
1795                                        0,
1796                                        0,
1797                                /* 49*/ 0,
1798                                        0,
1799                                        0,
1800                                        0,
1801                                        0,
1802                                /* 54*/ 0,
1803                                        0,
1804                                        MatSetUnfactored_MPISBAIJ,
1805                                        0,
1806                                        MatSetValuesBlocked_MPISBAIJ,
1807                                /* 59*/ MatGetSubMatrix_MPISBAIJ,
1808                                        0,
1809                                        0,
1810                                        0,
1811                                        0,
1812                                /* 64*/ 0,
1813                                        0,
1814                                        0,
1815                                        0,
1816                                        0,
1817                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1818                                        0,
1819                                        0,
1820                                        0,
1821                                        0,
1822                                /* 74*/ 0,
1823                                        0,
1824                                        0,
1825                                        0,
1826                                        0,
1827                                /* 79*/ 0,
1828                                        0,
1829                                        0,
1830                                        0,
1831                                        MatLoad_MPISBAIJ,
1832                                /* 84*/ 0,
1833                                        0,
1834                                        0,
1835                                        0,
1836                                        0,
1837                                /* 89*/ 0,
1838                                        0,
1839                                        0,
1840                                        0,
1841                                        0,
1842                                /* 94*/ 0,
1843                                        0,
1844                                        0,
1845                                        0,
1846                                        0,
1847                                /* 99*/ 0,
1848                                        0,
1849                                        0,
1850                                        0,
1851                                        0,
1852                                /*104*/ 0,
1853                                        MatRealPart_MPISBAIJ,
1854                                        MatImaginaryPart_MPISBAIJ,
1855                                        MatGetRowUpperTriangular_MPISBAIJ,
1856                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1857                                /*109*/ 0,
1858                                        0,
1859                                        0,
1860                                        0,
1861                                        0,
1862                                /*114*/ 0,
1863                                        0,
1864                                        0,
1865                                        0,
1866                                        0,
1867                                /*119*/ 0,
1868                                        0,
1869                                        0,
1870                                        0,
1871                                        0,
1872                                /*124*/ 0,
1873                                        0,
1874                                        0,
1875                                        0,
1876                                        0,
1877                                /*129*/ 0,
1878                                        0,
1879                                        0,
1880                                        0,
1881                                        0,
1882                                /*134*/ 0,
1883                                        0,
1884                                        0,
1885                                        0,
1886                                        0,
1887                                /*139*/ 0,
1888                                        0,
1889                                        0,
1890                                        0,
1891                                        0,
1892                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
1893 };
1894 
1895 #undef __FUNCT__
1896 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1897 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1898 {
1899   PetscFunctionBegin;
1900   *a = ((Mat_MPISBAIJ*)A->data)->A;
1901   PetscFunctionReturn(0);
1902 }
1903 
1904 #undef __FUNCT__
1905 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1906 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1907 {
1908   Mat_MPISBAIJ   *b;
1909   PetscErrorCode ierr;
1910   PetscInt       i,mbs,Mbs;
1911 
1912   PetscFunctionBegin;
1913   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1914   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1915   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1916   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1917 
1918   b   = (Mat_MPISBAIJ*)B->data;
1919   mbs = B->rmap->n/bs;
1920   Mbs = B->rmap->N/bs;
1921   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1922 
1923   B->rmap->bs = bs;
1924   b->bs2      = bs*bs;
1925   b->mbs      = mbs;
1926   b->Mbs      = Mbs;
1927   b->nbs      = B->cmap->n/bs;
1928   b->Nbs      = B->cmap->N/bs;
1929 
1930   for (i=0; i<=b->size; i++) {
1931     b->rangebs[i] = B->rmap->range[i]/bs;
1932   }
1933   b->rstartbs = B->rmap->rstart/bs;
1934   b->rendbs   = B->rmap->rend/bs;
1935 
1936   b->cstartbs = B->cmap->rstart/bs;
1937   b->cendbs   = B->cmap->rend/bs;
1938 
1939   if (!B->preallocated) {
1940     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1941     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1942     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1943     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1944     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1945     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1946     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1947     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1948     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1949   }
1950 
1951   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1952   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1953 
1954   B->preallocated = PETSC_TRUE;
1955   PetscFunctionReturn(0);
1956 }
1957 
1958 #undef __FUNCT__
1959 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1960 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1961 {
1962   PetscInt       m,rstart,cstart,cend;
1963   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1964   const PetscInt *JJ    =0;
1965   PetscScalar    *values=0;
1966   PetscErrorCode ierr;
1967 
1968   PetscFunctionBegin;
1969   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1970   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1971   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1972   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1973   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1974   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1975   m      = B->rmap->n/bs;
1976   rstart = B->rmap->rstart/bs;
1977   cstart = B->cmap->rstart/bs;
1978   cend   = B->cmap->rend/bs;
1979 
1980   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1981   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
1982   for (i=0; i<m; i++) {
1983     nz = ii[i+1] - ii[i];
1984     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1985     nz_max = PetscMax(nz_max,nz);
1986     JJ     = jj + ii[i];
1987     for (j=0; j<nz; j++) {
1988       if (*JJ >= cstart) break;
1989       JJ++;
1990     }
1991     d = 0;
1992     for (; j<nz; j++) {
1993       if (*JJ++ >= cend) break;
1994       d++;
1995     }
1996     d_nnz[i] = d;
1997     o_nnz[i] = nz - d;
1998   }
1999   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2000   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2001 
2002   values = (PetscScalar*)V;
2003   if (!values) {
2004     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2005     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2006   }
2007   for (i=0; i<m; i++) {
2008     PetscInt          row    = i + rstart;
2009     PetscInt          ncols  = ii[i+1] - ii[i];
2010     const PetscInt    *icols = jj + ii[i];
2011     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2012     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2013   }
2014 
2015   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2016   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2017   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2018   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2019   PetscFunctionReturn(0);
2020 }
2021 
2022 /*MC
2023    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2024    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2025    the matrix is stored.
2026 
2027   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2028   can call MatSetOption(Mat, MAT_HERMITIAN);
2029 
2030    Options Database Keys:
2031 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2032 
2033   Level: beginner
2034 
2035 .seealso: MatCreateMPISBAIJ
2036 M*/
2037 
2038 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
2039 
2040 #undef __FUNCT__
2041 #define __FUNCT__ "MatCreate_MPISBAIJ"
2042 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2043 {
2044   Mat_MPISBAIJ   *b;
2045   PetscErrorCode ierr;
2046   PetscBool      flg = PETSC_FALSE;
2047 
2048   PetscFunctionBegin;
2049   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2050   B->data = (void*)b;
2051   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2052 
2053   B->ops->destroy = MatDestroy_MPISBAIJ;
2054   B->ops->view    = MatView_MPISBAIJ;
2055   B->assembled    = PETSC_FALSE;
2056   B->insertmode   = NOT_SET_VALUES;
2057 
2058   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2059   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2060 
2061   /* build local table of row and column ownerships */
2062   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2063 
2064   /* build cache for off array entries formed */
2065   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2066 
2067   b->donotstash  = PETSC_FALSE;
2068   b->colmap      = NULL;
2069   b->garray      = NULL;
2070   b->roworiented = PETSC_TRUE;
2071 
2072   /* stuff used in block assembly */
2073   b->barray = 0;
2074 
2075   /* stuff used for matrix vector multiply */
2076   b->lvec    = 0;
2077   b->Mvctx   = 0;
2078   b->slvec0  = 0;
2079   b->slvec0b = 0;
2080   b->slvec1  = 0;
2081   b->slvec1a = 0;
2082   b->slvec1b = 0;
2083   b->sMvctx  = 0;
2084 
2085   /* stuff for MatGetRow() */
2086   b->rowindices   = 0;
2087   b->rowvalues    = 0;
2088   b->getrowactive = PETSC_FALSE;
2089 
2090   /* hash table stuff */
2091   b->ht           = 0;
2092   b->hd           = 0;
2093   b->ht_size      = 0;
2094   b->ht_flag      = PETSC_FALSE;
2095   b->ht_fact      = 0;
2096   b->ht_total_ct  = 0;
2097   b->ht_insert_ct = 0;
2098 
2099   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
2100   b->ijonly = PETSC_FALSE;
2101 
2102   b->in_loc = 0;
2103   b->v_loc  = 0;
2104   b->n_loc  = 0;
2105 
2106   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2107   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2108   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
2109   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2110   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2111   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
2112 #if defined(PETSC_HAVE_ELEMENTAL)
2113   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2114 #endif
2115 
2116   B->symmetric                  = PETSC_TRUE;
2117   B->structurally_symmetric     = PETSC_TRUE;
2118   B->symmetric_set              = PETSC_TRUE;
2119   B->structurally_symmetric_set = PETSC_TRUE;
2120 
2121   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2122   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2123   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2124   if (flg) {
2125     PetscReal fact = 1.39;
2126     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2127     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2128     if (fact <= 1.0) fact = 1.39;
2129     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2130     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2131   }
2132   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2133   PetscFunctionReturn(0);
2134 }
2135 
2136 /*MC
2137    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2138 
2139    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2140    and MATMPISBAIJ otherwise.
2141 
2142    Options Database Keys:
2143 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2144 
2145   Level: beginner
2146 
2147 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
2148 M*/
2149 
2150 #undef __FUNCT__
2151 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
2152 /*@C
2153    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2154    the user should preallocate the matrix storage by setting the parameters
2155    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2156    performance can be increased by more than a factor of 50.
2157 
2158    Collective on Mat
2159 
2160    Input Parameters:
2161 +  B - the matrix
2162 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2163           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2164 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2165            submatrix  (same for all local rows)
2166 .  d_nnz - array containing the number of block nonzeros in the various block rows
2167            in the upper triangular and diagonal part of the in diagonal portion of the local
2168            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2169            for the diagonal entry and set a value even if it is zero.
2170 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2171            submatrix (same for all local rows).
2172 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2173            off-diagonal portion of the local submatrix that is right of the diagonal
2174            (possibly different for each block row) or NULL.
2175 
2176 
2177    Options Database Keys:
2178 .   -mat_no_unroll - uses code that does not unroll the loops in the
2179                      block calculations (much slower)
2180 .   -mat_block_size - size of the blocks to use
2181 
2182    Notes:
2183 
2184    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2185    than it must be used on all processors that share the object for that argument.
2186 
2187    If the *_nnz parameter is given then the *_nz parameter is ignored
2188 
2189    Storage Information:
2190    For a square global matrix we define each processor's diagonal portion
2191    to be its local rows and the corresponding columns (a square submatrix);
2192    each processor's off-diagonal portion encompasses the remainder of the
2193    local matrix (a rectangular submatrix).
2194 
2195    The user can specify preallocated storage for the diagonal part of
2196    the local submatrix with either d_nz or d_nnz (not both).  Set
2197    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2198    memory allocation.  Likewise, specify preallocated storage for the
2199    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2200 
2201    You can call MatGetInfo() to get information on how effective the preallocation was;
2202    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2203    You can also run with the option -info and look for messages with the string
2204    malloc in them to see if additional memory allocation was needed.
2205 
2206    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2207    the figure below we depict these three local rows and all columns (0-11).
2208 
2209 .vb
2210            0 1 2 3 4 5 6 7 8 9 10 11
2211           --------------------------
2212    row 3  |. . . d d d o o o o  o  o
2213    row 4  |. . . d d d o o o o  o  o
2214    row 5  |. . . d d d o o o o  o  o
2215           --------------------------
2216 .ve
2217 
2218    Thus, any entries in the d locations are stored in the d (diagonal)
2219    submatrix, and any entries in the o locations are stored in the
2220    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2221    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2222 
2223    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2224    plus the diagonal part of the d matrix,
2225    and o_nz should indicate the number of block nonzeros per row in the o matrix
2226 
2227    In general, for PDE problems in which most nonzeros are near the diagonal,
2228    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2229    or you will get TERRIBLE performance; see the users' manual chapter on
2230    matrices.
2231 
2232    Level: intermediate
2233 
2234 .keywords: matrix, block, aij, compressed row, sparse, parallel
2235 
2236 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2237 @*/
2238 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2239 {
2240   PetscErrorCode ierr;
2241 
2242   PetscFunctionBegin;
2243   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2244   PetscValidType(B,1);
2245   PetscValidLogicalCollectiveInt(B,bs,2);
2246   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2247   PetscFunctionReturn(0);
2248 }
2249 
2250 #undef __FUNCT__
2251 #define __FUNCT__ "MatCreateSBAIJ"
2252 /*@C
2253    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2254    (block compressed row).  For good matrix assembly performance
2255    the user should preallocate the matrix storage by setting the parameters
2256    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2257    performance can be increased by more than a factor of 50.
2258 
2259    Collective on MPI_Comm
2260 
2261    Input Parameters:
2262 +  comm - MPI communicator
2263 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2264           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2265 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2266            This value should be the same as the local size used in creating the
2267            y vector for the matrix-vector product y = Ax.
2268 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2269            This value should be the same as the local size used in creating the
2270            x vector for the matrix-vector product y = Ax.
2271 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2272 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2273 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2274            submatrix  (same for all local rows)
2275 .  d_nnz - array containing the number of block nonzeros in the various block rows
2276            in the upper triangular portion of the in diagonal portion of the local
2277            (possibly different for each block block row) or NULL.
2278            If you plan to factor the matrix you must leave room for the diagonal entry and
2279            set its value even if it is zero.
2280 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2281            submatrix (same for all local rows).
2282 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2283            off-diagonal portion of the local submatrix (possibly different for
2284            each block row) or NULL.
2285 
2286    Output Parameter:
2287 .  A - the matrix
2288 
2289    Options Database Keys:
2290 .   -mat_no_unroll - uses code that does not unroll the loops in the
2291                      block calculations (much slower)
2292 .   -mat_block_size - size of the blocks to use
2293 .   -mat_mpi - use the parallel matrix data structures even on one processor
2294                (defaults to using SeqBAIJ format on one processor)
2295 
2296    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2297    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2298    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2299 
2300    Notes:
2301    The number of rows and columns must be divisible by blocksize.
2302    This matrix type does not support complex Hermitian operation.
2303 
2304    The user MUST specify either the local or global matrix dimensions
2305    (possibly both).
2306 
2307    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2308    than it must be used on all processors that share the object for that argument.
2309 
2310    If the *_nnz parameter is given then the *_nz parameter is ignored
2311 
2312    Storage Information:
2313    For a square global matrix we define each processor's diagonal portion
2314    to be its local rows and the corresponding columns (a square submatrix);
2315    each processor's off-diagonal portion encompasses the remainder of the
2316    local matrix (a rectangular submatrix).
2317 
2318    The user can specify preallocated storage for the diagonal part of
2319    the local submatrix with either d_nz or d_nnz (not both).  Set
2320    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2321    memory allocation.  Likewise, specify preallocated storage for the
2322    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2323 
2324    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2325    the figure below we depict these three local rows and all columns (0-11).
2326 
2327 .vb
2328            0 1 2 3 4 5 6 7 8 9 10 11
2329           --------------------------
2330    row 3  |. . . d d d o o o o  o  o
2331    row 4  |. . . d d d o o o o  o  o
2332    row 5  |. . . d d d o o o o  o  o
2333           --------------------------
2334 .ve
2335 
2336    Thus, any entries in the d locations are stored in the d (diagonal)
2337    submatrix, and any entries in the o locations are stored in the
2338    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2339    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2340 
2341    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2342    plus the diagonal part of the d matrix,
2343    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2344    In general, for PDE problems in which most nonzeros are near the diagonal,
2345    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2346    or you will get TERRIBLE performance; see the users' manual chapter on
2347    matrices.
2348 
2349    Level: intermediate
2350 
2351 .keywords: matrix, block, aij, compressed row, sparse, parallel
2352 
2353 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2354 @*/
2355 
2356 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2357 {
2358   PetscErrorCode ierr;
2359   PetscMPIInt    size;
2360 
2361   PetscFunctionBegin;
2362   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2363   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2364   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2365   if (size > 1) {
2366     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2367     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2368   } else {
2369     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2370     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2371   }
2372   PetscFunctionReturn(0);
2373 }
2374 
2375 
2376 #undef __FUNCT__
2377 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2378 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2379 {
2380   Mat            mat;
2381   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2382   PetscErrorCode ierr;
2383   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2384   PetscScalar    *array;
2385 
2386   PetscFunctionBegin;
2387   *newmat = 0;
2388 
2389   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2390   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2391   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2392   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2393   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2394   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2395 
2396   mat->factortype   = matin->factortype;
2397   mat->preallocated = PETSC_TRUE;
2398   mat->assembled    = PETSC_TRUE;
2399   mat->insertmode   = NOT_SET_VALUES;
2400 
2401   a      = (Mat_MPISBAIJ*)mat->data;
2402   a->bs2 = oldmat->bs2;
2403   a->mbs = oldmat->mbs;
2404   a->nbs = oldmat->nbs;
2405   a->Mbs = oldmat->Mbs;
2406   a->Nbs = oldmat->Nbs;
2407 
2408 
2409   a->size         = oldmat->size;
2410   a->rank         = oldmat->rank;
2411   a->donotstash   = oldmat->donotstash;
2412   a->roworiented  = oldmat->roworiented;
2413   a->rowindices   = 0;
2414   a->rowvalues    = 0;
2415   a->getrowactive = PETSC_FALSE;
2416   a->barray       = 0;
2417   a->rstartbs     = oldmat->rstartbs;
2418   a->rendbs       = oldmat->rendbs;
2419   a->cstartbs     = oldmat->cstartbs;
2420   a->cendbs       = oldmat->cendbs;
2421 
2422   /* hash table stuff */
2423   a->ht           = 0;
2424   a->hd           = 0;
2425   a->ht_size      = 0;
2426   a->ht_flag      = oldmat->ht_flag;
2427   a->ht_fact      = oldmat->ht_fact;
2428   a->ht_total_ct  = 0;
2429   a->ht_insert_ct = 0;
2430 
2431   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2432   if (oldmat->colmap) {
2433 #if defined(PETSC_USE_CTABLE)
2434     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2435 #else
2436     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2437     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2438     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2439 #endif
2440   } else a->colmap = 0;
2441 
2442   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2443     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2444     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2445     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2446   } else a->garray = 0;
2447 
2448   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2449   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2450   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2451   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2452   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2453 
2454   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2455   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2456   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2457   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2458 
2459   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2460   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2461   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2462   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2463   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2464   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2465   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2466   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2467   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2468   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2469   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2470   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2471   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2472 
2473   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2474   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2475   a->sMvctx = oldmat->sMvctx;
2476   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2477 
2478   ierr    =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2479   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2480   ierr    =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2481   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2482   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2483   *newmat = mat;
2484   PetscFunctionReturn(0);
2485 }
2486 
2487 #undef __FUNCT__
2488 #define __FUNCT__ "MatLoad_MPISBAIJ"
2489 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2490 {
2491   PetscErrorCode ierr;
2492   PetscInt       i,nz,j,rstart,rend;
2493   PetscScalar    *vals,*buf;
2494   MPI_Comm       comm;
2495   MPI_Status     status;
2496   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2497   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2498   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2499   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2500   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2501   PetscInt       dcount,kmax,k,nzcount,tmp;
2502   int            fd;
2503 
2504   PetscFunctionBegin;
2505   /* force binary viewer to load .info file if it has not yet done so */
2506   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2507   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2508   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2509   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2510   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2511   if (bs < 0) bs = 1;
2512 
2513   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2514   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2515   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2516   if (!rank) {
2517     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2518     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2519     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2520   }
2521 
2522   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2523   M    = header[1];
2524   N    = header[2];
2525 
2526   /* If global sizes are set, check if they are consistent with that given in the file */
2527   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
2528   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
2529 
2530   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2531 
2532   /*
2533      This code adds extra rows to make sure the number of rows is
2534      divisible by the blocksize
2535   */
2536   Mbs        = M/bs;
2537   extra_rows = bs - M + bs*(Mbs);
2538   if (extra_rows == bs) extra_rows = 0;
2539   else                  Mbs++;
2540   if (extra_rows &&!rank) {
2541     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2542   }
2543 
2544   /* determine ownership of all rows */
2545   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2546     mbs = Mbs/size + ((Mbs % size) > rank);
2547     m   = mbs*bs;
2548   } else { /* User Set */
2549     m   = newmat->rmap->n;
2550     mbs = m/bs;
2551   }
2552   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2553   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2554   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2555   rowners[0] = 0;
2556   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2557   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2558   rstart = rowners[rank];
2559   rend   = rowners[rank+1];
2560 
2561   /* distribute row lengths to all processors */
2562   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2563   if (!rank) {
2564     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2565     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2566     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2567     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2568     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2569     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2570     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2571   } else {
2572     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2573   }
2574 
2575   if (!rank) {   /* procs[0] */
2576     /* calculate the number of nonzeros on each processor */
2577     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2578     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2579     for (i=0; i<size; i++) {
2580       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2581         procsnz[i] += rowlengths[j];
2582       }
2583     }
2584     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2585 
2586     /* determine max buffer needed and allocate it */
2587     maxnz = 0;
2588     for (i=0; i<size; i++) {
2589       maxnz = PetscMax(maxnz,procsnz[i]);
2590     }
2591     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2592 
2593     /* read in my part of the matrix column indices  */
2594     nz     = procsnz[0];
2595     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2596     mycols = ibuf;
2597     if (size == 1) nz -= extra_rows;
2598     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2599     if (size == 1) {
2600       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2601     }
2602 
2603     /* read in every ones (except the last) and ship off */
2604     for (i=1; i<size-1; i++) {
2605       nz   = procsnz[i];
2606       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2607       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2608     }
2609     /* read in the stuff for the last proc */
2610     if (size != 1) {
2611       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2612       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2613       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2614       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2615     }
2616     ierr = PetscFree(cols);CHKERRQ(ierr);
2617   } else {  /* procs[i], i>0 */
2618     /* determine buffer space needed for message */
2619     nz = 0;
2620     for (i=0; i<m; i++) nz += locrowlens[i];
2621     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2622     mycols = ibuf;
2623     /* receive message of column indices*/
2624     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2625     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2626     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2627   }
2628 
2629   /* loop over local rows, determining number of off diagonal entries */
2630   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2631   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2632   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2633   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2634   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2635   rowcount = 0;
2636   nzcount  = 0;
2637   for (i=0; i<mbs; i++) {
2638     dcount  = 0;
2639     odcount = 0;
2640     for (j=0; j<bs; j++) {
2641       kmax = locrowlens[rowcount];
2642       for (k=0; k<kmax; k++) {
2643         tmp = mycols[nzcount++]/bs; /* block col. index */
2644         if (!mask[tmp]) {
2645           mask[tmp] = 1;
2646           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2647           else masked1[dcount++] = tmp; /* entry in diag portion */
2648         }
2649       }
2650       rowcount++;
2651     }
2652 
2653     dlens[i]  = dcount;  /* d_nzz[i] */
2654     odlens[i] = odcount; /* o_nzz[i] */
2655 
2656     /* zero out the mask elements we set */
2657     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2658     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2659   }
2660   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2661   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2662   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2663 
2664   if (!rank) {
2665     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2666     /* read in my part of the matrix numerical values  */
2667     nz     = procsnz[0];
2668     vals   = buf;
2669     mycols = ibuf;
2670     if (size == 1) nz -= extra_rows;
2671     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2672     if (size == 1) {
2673       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2674     }
2675 
2676     /* insert into matrix */
2677     jj = rstart*bs;
2678     for (i=0; i<m; i++) {
2679       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2680       mycols += locrowlens[i];
2681       vals   += locrowlens[i];
2682       jj++;
2683     }
2684 
2685     /* read in other processors (except the last one) and ship out */
2686     for (i=1; i<size-1; i++) {
2687       nz   = procsnz[i];
2688       vals = buf;
2689       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2690       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2691     }
2692     /* the last proc */
2693     if (size != 1) {
2694       nz   = procsnz[i] - extra_rows;
2695       vals = buf;
2696       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2697       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2698       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2699     }
2700     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2701 
2702   } else {
2703     /* receive numeric values */
2704     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2705 
2706     /* receive message of values*/
2707     vals   = buf;
2708     mycols = ibuf;
2709     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2710     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2711     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2712 
2713     /* insert into matrix */
2714     jj = rstart*bs;
2715     for (i=0; i<m; i++) {
2716       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2717       mycols += locrowlens[i];
2718       vals   += locrowlens[i];
2719       jj++;
2720     }
2721   }
2722 
2723   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2724   ierr = PetscFree(buf);CHKERRQ(ierr);
2725   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2726   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2727   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2728   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2729   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2730   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2731   PetscFunctionReturn(0);
2732 }
2733 
2734 #undef __FUNCT__
2735 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2736 /*XXXXX@
2737    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2738 
2739    Input Parameters:
2740 .  mat  - the matrix
2741 .  fact - factor
2742 
2743    Not Collective on Mat, each process can have a different hash factor
2744 
2745    Level: advanced
2746 
2747   Notes:
2748    This can also be set by the command line option: -mat_use_hash_table fact
2749 
2750 .keywords: matrix, hashtable, factor, HT
2751 
2752 .seealso: MatSetOption()
2753 @XXXXX*/
2754 
2755 
2756 #undef __FUNCT__
2757 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2758 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2759 {
2760   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2761   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2762   PetscReal      atmp;
2763   PetscReal      *work,*svalues,*rvalues;
2764   PetscErrorCode ierr;
2765   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2766   PetscMPIInt    rank,size;
2767   PetscInt       *rowners_bs,dest,count,source;
2768   PetscScalar    *va;
2769   MatScalar      *ba;
2770   MPI_Status     stat;
2771 
2772   PetscFunctionBegin;
2773   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2774   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2775   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2776 
2777   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2778   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2779 
2780   bs  = A->rmap->bs;
2781   mbs = a->mbs;
2782   Mbs = a->Mbs;
2783   ba  = b->a;
2784   bi  = b->i;
2785   bj  = b->j;
2786 
2787   /* find ownerships */
2788   rowners_bs = A->rmap->range;
2789 
2790   /* each proc creates an array to be distributed */
2791   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2792   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2793 
2794   /* row_max for B */
2795   if (rank != size-1) {
2796     for (i=0; i<mbs; i++) {
2797       ncols = bi[1] - bi[0]; bi++;
2798       brow  = bs*i;
2799       for (j=0; j<ncols; j++) {
2800         bcol = bs*(*bj);
2801         for (kcol=0; kcol<bs; kcol++) {
2802           col  = bcol + kcol;                /* local col index */
2803           col += rowners_bs[rank+1];      /* global col index */
2804           for (krow=0; krow<bs; krow++) {
2805             atmp = PetscAbsScalar(*ba); ba++;
2806             row  = brow + krow;   /* local row index */
2807             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2808             if (work[col] < atmp) work[col] = atmp;
2809           }
2810         }
2811         bj++;
2812       }
2813     }
2814 
2815     /* send values to its owners */
2816     for (dest=rank+1; dest<size; dest++) {
2817       svalues = work + rowners_bs[dest];
2818       count   = rowners_bs[dest+1]-rowners_bs[dest];
2819       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2820     }
2821   }
2822 
2823   /* receive values */
2824   if (rank) {
2825     rvalues = work;
2826     count   = rowners_bs[rank+1]-rowners_bs[rank];
2827     for (source=0; source<rank; source++) {
2828       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2829       /* process values */
2830       for (i=0; i<count; i++) {
2831         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2832       }
2833     }
2834   }
2835 
2836   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2837   ierr = PetscFree(work);CHKERRQ(ierr);
2838   PetscFunctionReturn(0);
2839 }
2840 
2841 #undef __FUNCT__
2842 #define __FUNCT__ "MatSOR_MPISBAIJ"
2843 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2844 {
2845   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2846   PetscErrorCode    ierr;
2847   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2848   PetscScalar       *x,*ptr,*from;
2849   Vec               bb1;
2850   const PetscScalar *b;
2851 
2852   PetscFunctionBegin;
2853   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2854   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2855 
2856   if (flag == SOR_APPLY_UPPER) {
2857     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2858     PetscFunctionReturn(0);
2859   }
2860 
2861   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2862     if (flag & SOR_ZERO_INITIAL_GUESS) {
2863       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2864       its--;
2865     }
2866 
2867     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2868     while (its--) {
2869 
2870       /* lower triangular part: slvec0b = - B^T*xx */
2871       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2872 
2873       /* copy xx into slvec0a */
2874       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2875       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2876       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2877       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2878 
2879       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2880 
2881       /* copy bb into slvec1a */
2882       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2883       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2884       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2885       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2886 
2887       /* set slvec1b = 0 */
2888       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2889 
2890       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2891       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2892       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2893       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2894 
2895       /* upper triangular part: bb1 = bb1 - B*x */
2896       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2897 
2898       /* local diagonal sweep */
2899       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2900     }
2901     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2902   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2903     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2904   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2905     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2906   } else if (flag & SOR_EISENSTAT) {
2907     Vec               xx1;
2908     PetscBool         hasop;
2909     const PetscScalar *diag;
2910     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2911     PetscInt          i,n;
2912 
2913     if (!mat->xx1) {
2914       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2915       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2916     }
2917     xx1 = mat->xx1;
2918     bb1 = mat->bb1;
2919 
2920     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2921 
2922     if (!mat->diag) {
2923       /* this is wrong for same matrix with new nonzero values */
2924       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2925       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2926     }
2927     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2928 
2929     if (hasop) {
2930       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2931       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2932     } else {
2933       /*
2934           These two lines are replaced by code that may be a bit faster for a good compiler
2935       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2936       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2937       */
2938       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2939       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2940       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2941       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2942       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2943       if (omega == 1.0) {
2944         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2945         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2946       } else {
2947         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2948         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2949       }
2950       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2951       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2952       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2953       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2954     }
2955 
2956     /* multiply off-diagonal portion of matrix */
2957     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2958     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2959     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2960     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2961     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2962     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2963     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2964     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2965     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2966     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2967 
2968     /* local sweep */
2969     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2970     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2971   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2972   PetscFunctionReturn(0);
2973 }
2974 
2975 #undef __FUNCT__
2976 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2977 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2978 {
2979   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2980   PetscErrorCode ierr;
2981   Vec            lvec1,bb1;
2982 
2983   PetscFunctionBegin;
2984   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2985   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2986 
2987   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2988     if (flag & SOR_ZERO_INITIAL_GUESS) {
2989       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2990       its--;
2991     }
2992 
2993     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2994     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2995     while (its--) {
2996       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2997 
2998       /* lower diagonal part: bb1 = bb - B^T*xx */
2999       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
3000       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
3001 
3002       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3003       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
3004       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3005 
3006       /* upper diagonal part: bb1 = bb1 - B*x */
3007       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
3008       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
3009 
3010       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3011 
3012       /* diagonal sweep */
3013       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3014     }
3015     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
3016     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3017   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3018   PetscFunctionReturn(0);
3019 }
3020 
3021 #undef __FUNCT__
3022 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
3023 /*@
3024      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
3025          CSR format the local rows.
3026 
3027    Collective on MPI_Comm
3028 
3029    Input Parameters:
3030 +  comm - MPI communicator
3031 .  bs - the block size, only a block size of 1 is supported
3032 .  m - number of local rows (Cannot be PETSC_DECIDE)
3033 .  n - This value should be the same as the local size used in creating the
3034        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3035        calculated if N is given) For square matrices n is almost always m.
3036 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3037 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3038 .   i - row indices
3039 .   j - column indices
3040 -   a - matrix values
3041 
3042    Output Parameter:
3043 .   mat - the matrix
3044 
3045    Level: intermediate
3046 
3047    Notes:
3048        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3049      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3050      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3051 
3052        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3053 
3054 .keywords: matrix, aij, compressed row, sparse, parallel
3055 
3056 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3057           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3058 @*/
3059 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3060 {
3061   PetscErrorCode ierr;
3062 
3063 
3064   PetscFunctionBegin;
3065   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3066   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3067   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3068   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3069   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3070   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3071   PetscFunctionReturn(0);
3072 }
3073 
3074 
3075 #undef __FUNCT__
3076 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
3077 /*@C
3078    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
3079    (the default parallel PETSc format).
3080 
3081    Collective on MPI_Comm
3082 
3083    Input Parameters:
3084 +  B - the matrix
3085 .  bs - the block size
3086 .  i - the indices into j for the start of each local row (starts with zero)
3087 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3088 -  v - optional values in the matrix
3089 
3090    Level: developer
3091 
3092 .keywords: matrix, aij, compressed row, sparse, parallel
3093 
3094 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
3095 @*/
3096 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3097 {
3098   PetscErrorCode ierr;
3099 
3100   PetscFunctionBegin;
3101   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3102   PetscFunctionReturn(0);
3103 }
3104 
3105 #undef __FUNCT__
3106 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ"
3107 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3108 {
3109   PetscErrorCode ierr;
3110   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3111   PetscInt       *indx;
3112   PetscScalar    *values;
3113 
3114   PetscFunctionBegin;
3115   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3116   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3117     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
3118     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
3119     PetscInt       *bindx,rmax=a->rmax,j;
3120 
3121     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3122     mbs = m/bs; Nbs = N/cbs;
3123     if (n == PETSC_DECIDE) {
3124       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
3125     }
3126     /* Check sum(n) = Nbs */
3127     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3128     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
3129 
3130     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3131     rstart -= mbs;
3132 
3133     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3134     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
3135     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3136     for (i=0; i<mbs; i++) {
3137       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3138       nnz = nnz/bs;
3139       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3140       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3141       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3142     }
3143     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3144     ierr = PetscFree(bindx);CHKERRQ(ierr);
3145 
3146     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3147     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3148     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3149     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
3150     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3151     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3152   }
3153 
3154   /* numeric phase */
3155   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3156   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3157 
3158   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3159   for (i=0; i<m; i++) {
3160     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3161     Ii   = i + rstart;
3162     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3163     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3164   }
3165   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3166   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3167   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3168   PetscFunctionReturn(0);
3169 }
3170