xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 09f3b4e5628a00a1eaf17d80982cfbcc515cc9c1)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
66   PetscErrorCode ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 
76 #define CHUNKSIZE  10
77 
78 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
79 { \
80  \
81     brow = row/bs;  \
82     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
83     rmax = aimax[brow]; nrow = ailen[brow]; \
84       bcol = col/bs; \
85       ridx = row % bs; cidx = col % bs; \
86       low = 0; high = nrow; \
87       while (high-low > 3) { \
88         t = (low+high)/2; \
89         if (rp[t] > bcol) high = t; \
90         else              low  = t; \
91       } \
92       for (_i=low; _i<high; _i++) { \
93         if (rp[_i] > bcol) break; \
94         if (rp[_i] == bcol) { \
95           bap  = ap +  bs2*_i + bs*cidx + ridx; \
96           if (addv == ADD_VALUES) *bap += value;  \
97           else                    *bap  = value;  \
98           goto a_noinsert; \
99         } \
100       } \
101       if (a->nonew == 1) goto a_noinsert; \
102       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
103       MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \
104       N = nrow++ - 1;  \
105       /* shift up all the later entries in this row */ \
106       for (ii=N; ii>=_i; ii--) { \
107         rp[ii+1] = rp[ii]; \
108         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
109       } \
110       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
111       rp[_i]                      = bcol;  \
112       ap[bs2*_i + bs*cidx + ridx] = value;  \
113       a_noinsert:; \
114     ailen[brow] = nrow; \
115 }
116 #ifndef MatSetValues_SeqBAIJ_B_Private
117 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
118 { \
119     brow = row/bs;  \
120     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
121     rmax = bimax[brow]; nrow = bilen[brow]; \
122       bcol = col/bs; \
123       ridx = row % bs; cidx = col % bs; \
124       low = 0; high = nrow; \
125       while (high-low > 3) { \
126         t = (low+high)/2; \
127         if (rp[t] > bcol) high = t; \
128         else              low  = t; \
129       } \
130       for (_i=low; _i<high; _i++) { \
131         if (rp[_i] > bcol) break; \
132         if (rp[_i] == bcol) { \
133           bap  = ap +  bs2*_i + bs*cidx + ridx; \
134           if (addv == ADD_VALUES) *bap += value;  \
135           else                    *bap  = value;  \
136           goto b_noinsert; \
137         } \
138       } \
139       if (b->nonew == 1) goto b_noinsert; \
140       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
141       MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \
142       N = nrow++ - 1;  \
143       /* shift up all the later entries in this row */ \
144       for (ii=N; ii>=_i; ii--) { \
145         rp[ii+1] = rp[ii]; \
146         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
147       } \
148       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
149       rp[_i]                      = bcol;  \
150       ap[bs2*_i + bs*cidx + ridx] = value;  \
151       b_noinsert:; \
152     bilen[brow] = nrow; \
153 }
154 #endif
155 
156 #if defined(PETSC_USE_MAT_SINGLE)
157 #undef __FUNCT__
158 #define __FUNCT__ "MatSetValues_MPISBAIJ"
159 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
160 {
161   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
162   PetscErrorCode ierr;
163   PetscInt       i,N = m*n;
164   MatScalar      *vsingle;
165 
166   PetscFunctionBegin;
167   if (N > b->setvalueslen) {
168     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
169     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
170     b->setvalueslen  = N;
171   }
172   vsingle = b->setvaluescopy;
173 
174   for (i=0; i<N; i++) {
175     vsingle[i] = v[i];
176   }
177   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
178   PetscFunctionReturn(0);
179 }
180 
181 #undef __FUNCT__
182 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
183 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
184 {
185   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
186   PetscErrorCode ierr;
187   PetscInt       i,N = m*n*b->bs2;
188   MatScalar      *vsingle;
189 
190   PetscFunctionBegin;
191   if (N > b->setvalueslen) {
192     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
193     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
194     b->setvalueslen  = N;
195   }
196   vsingle = b->setvaluescopy;
197   for (i=0; i<N; i++) {
198     vsingle[i] = v[i];
199   }
200   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
201   PetscFunctionReturn(0);
202 }
203 #endif
204 
205 /* Only add/insert a(i,j) with i<=j (blocks).
206    Any a(i,j) with i>j input by user is ingored.
207 */
208 #undef __FUNCT__
209 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
210 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
211 {
212   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
213   MatScalar      value;
214   PetscTruth     roworiented = baij->roworiented;
215   PetscErrorCode ierr;
216   PetscInt       i,j,row,col;
217   PetscInt       rstart_orig=baij->rstart_bs;
218   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
219   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
220 
221   /* Some Variables required in the macro */
222   Mat            A = baij->A;
223   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
224   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
225   MatScalar      *aa=a->a;
226 
227   Mat            B = baij->B;
228   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
229   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
230   MatScalar     *ba=b->a;
231 
232   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
233   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
234   MatScalar     *ap,*bap;
235 
236   /* for stash */
237   PetscInt      n_loc, *in_loc=0;
238   MatScalar     *v_loc=0;
239 
240   PetscFunctionBegin;
241 
242   if(!baij->donotstash){
243     ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr);
244     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
245   }
246 
247   for (i=0; i<m; i++) {
248     if (im[i] < 0) continue;
249 #if defined(PETSC_USE_DEBUG)
250     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
251 #endif
252     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
253       row = im[i] - rstart_orig;              /* local row index */
254       for (j=0; j<n; j++) {
255         if (im[i]/bs > in[j]/bs){
256           if (a->ignore_ltriangular){
257             continue;    /* ignore lower triangular blocks */
258           } else {
259             SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)");
260           }
261         }
262         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
263           col = in[j] - cstart_orig;          /* local col index */
264           brow = row/bs; bcol = col/bs;
265           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
266           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
267           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
268           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
269         } else if (in[j] < 0) continue;
270 #if defined(PETSC_USE_DEBUG)
271         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
272 #endif
273         else {  /* off-diag entry (B) */
274           if (mat->was_assembled) {
275             if (!baij->colmap) {
276               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
277             }
278 #if defined (PETSC_USE_CTABLE)
279             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
280             col  = col - 1;
281 #else
282             col = baij->colmap[in[j]/bs] - 1;
283 #endif
284             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
285               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
286               col =  in[j];
287               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
288               B = baij->B;
289               b = (Mat_SeqBAIJ*)(B)->data;
290               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
291               ba=b->a;
292             } else col += in[j]%bs;
293           } else col = in[j];
294           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
295           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
296           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
297         }
298       }
299     } else {  /* off processor entry */
300       if (!baij->donotstash) {
301         n_loc = 0;
302         for (j=0; j<n; j++){
303           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
304           in_loc[n_loc] = in[j];
305           if (roworiented) {
306             v_loc[n_loc] = v[i*n+j];
307           } else {
308             v_loc[n_loc] = v[j*m+i];
309           }
310           n_loc++;
311         }
312         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
313       }
314     }
315   }
316 
317   if(!baij->donotstash){
318     ierr = PetscFree(in_loc);CHKERRQ(ierr);
319     ierr = PetscFree(v_loc);CHKERRQ(ierr);
320   }
321   PetscFunctionReturn(0);
322 }
323 
324 #undef __FUNCT__
325 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
326 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
327 {
328   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
329   const MatScalar *value;
330   MatScalar       *barray=baij->barray;
331   PetscTruth      roworiented = baij->roworiented;
332   PetscErrorCode  ierr;
333   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
334   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
335   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
336 
337   PetscFunctionBegin;
338   if(!barray) {
339     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
340     baij->barray = barray;
341   }
342 
343   if (roworiented) {
344     stepval = (n-1)*bs;
345   } else {
346     stepval = (m-1)*bs;
347   }
348   for (i=0; i<m; i++) {
349     if (im[i] < 0) continue;
350 #if defined(PETSC_USE_DEBUG)
351     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
352 #endif
353     if (im[i] >= rstart && im[i] < rend) {
354       row = im[i] - rstart;
355       for (j=0; j<n; j++) {
356         /* If NumCol = 1 then a copy is not required */
357         if ((roworiented) && (n == 1)) {
358           barray = (MatScalar*) v + i*bs2;
359         } else if((!roworiented) && (m == 1)) {
360           barray = (MatScalar*) v + j*bs2;
361         } else { /* Here a copy is required */
362           if (roworiented) {
363             value = v + i*(stepval+bs)*bs + j*bs;
364           } else {
365             value = v + j*(stepval+bs)*bs + i*bs;
366           }
367           for (ii=0; ii<bs; ii++,value+=stepval) {
368             for (jj=0; jj<bs; jj++) {
369               *barray++  = *value++;
370             }
371           }
372           barray -=bs2;
373         }
374 
375         if (in[j] >= cstart && in[j] < cend){
376           col  = in[j] - cstart;
377           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
378         }
379         else if (in[j] < 0) continue;
380 #if defined(PETSC_USE_DEBUG)
381         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
382 #endif
383         else {
384           if (mat->was_assembled) {
385             if (!baij->colmap) {
386               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
387             }
388 
389 #if defined(PETSC_USE_DEBUG)
390 #if defined (PETSC_USE_CTABLE)
391             { PetscInt data;
392               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
393               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
394             }
395 #else
396             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
397 #endif
398 #endif
399 #if defined (PETSC_USE_CTABLE)
400 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
401             col  = (col - 1)/bs;
402 #else
403             col = (baij->colmap[in[j]] - 1)/bs;
404 #endif
405             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
406               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
407               col =  in[j];
408             }
409           }
410           else col = in[j];
411           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
412         }
413       }
414     } else {
415       if (!baij->donotstash) {
416         if (roworiented) {
417           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
418         } else {
419           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
420         }
421       }
422     }
423   }
424   PetscFunctionReturn(0);
425 }
426 
427 #undef __FUNCT__
428 #define __FUNCT__ "MatGetValues_MPISBAIJ"
429 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
430 {
431   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
432   PetscErrorCode ierr;
433   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
434   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
435 
436   PetscFunctionBegin;
437   for (i=0; i<m; i++) {
438     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
439     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
440     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
441       row = idxm[i] - bsrstart;
442       for (j=0; j<n; j++) {
443         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
444         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
445         if (idxn[j] >= bscstart && idxn[j] < bscend){
446           col = idxn[j] - bscstart;
447           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
448         } else {
449           if (!baij->colmap) {
450             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
451           }
452 #if defined (PETSC_USE_CTABLE)
453           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
454           data --;
455 #else
456           data = baij->colmap[idxn[j]/bs]-1;
457 #endif
458           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
459           else {
460             col  = data + idxn[j]%bs;
461             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
462           }
463         }
464       }
465     } else {
466       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
467     }
468   }
469  PetscFunctionReturn(0);
470 }
471 
472 #undef __FUNCT__
473 #define __FUNCT__ "MatNorm_MPISBAIJ"
474 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
475 {
476   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
477   PetscErrorCode ierr;
478   PetscReal      sum[2],*lnorm2;
479 
480   PetscFunctionBegin;
481   if (baij->size == 1) {
482     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
483   } else {
484     if (type == NORM_FROBENIUS) {
485       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
486       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
487       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
488       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
489       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
490       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
491       *norm = sqrt(sum[0] + 2*sum[1]);
492       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
493     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
494       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
495       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
496       PetscReal    *rsum,*rsum2,vabs;
497       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstart,nz;
498       PetscInt     brow,bcol,col,bs=baij->A->bs,row,grow,gcol,mbs=amat->mbs;
499       MatScalar    *v;
500 
501       ierr  = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr);
502       rsum2 = rsum + mat->N;
503       ierr  = PetscMemzero(rsum,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
504       /* Amat */
505       v = amat->a; jj = amat->j;
506       for (brow=0; brow<mbs; brow++) {
507         grow = bs*(rstart + brow);
508         nz = amat->i[brow+1] - amat->i[brow];
509         for (bcol=0; bcol<nz; bcol++){
510           gcol = bs*(rstart + *jj); jj++;
511           for (col=0; col<bs; col++){
512             for (row=0; row<bs; row++){
513               vabs = PetscAbsScalar(*v); v++;
514               rsum[gcol+col] += vabs;
515               /* non-diagonal block */
516               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
517             }
518           }
519         }
520       }
521       /* Bmat */
522       v = bmat->a; jj = bmat->j;
523       for (brow=0; brow<mbs; brow++) {
524         grow = bs*(rstart + brow);
525         nz = bmat->i[brow+1] - bmat->i[brow];
526         for (bcol=0; bcol<nz; bcol++){
527           gcol = bs*garray[*jj]; jj++;
528           for (col=0; col<bs; col++){
529             for (row=0; row<bs; row++){
530               vabs = PetscAbsScalar(*v); v++;
531               rsum[gcol+col] += vabs;
532               rsum[grow+row] += vabs;
533             }
534           }
535         }
536       }
537       ierr = MPI_Allreduce(rsum,rsum2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
538       *norm = 0.0;
539       for (col=0; col<mat->N; col++) {
540         if (rsum2[col] > *norm) *norm = rsum2[col];
541       }
542       ierr = PetscFree(rsum);CHKERRQ(ierr);
543     } else {
544       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
545     }
546   }
547   PetscFunctionReturn(0);
548 }
549 
550 #undef __FUNCT__
551 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
552 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
553 {
554   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
555   PetscErrorCode ierr;
556   PetscInt       nstash,reallocs;
557   InsertMode     addv;
558 
559   PetscFunctionBegin;
560   if (baij->donotstash) {
561     PetscFunctionReturn(0);
562   }
563 
564   /* make sure all processors are either in INSERTMODE or ADDMODE */
565   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
566   if (addv == (ADD_VALUES|INSERT_VALUES)) {
567     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
568   }
569   mat->insertmode = addv; /* in case this processor had no cache */
570 
571   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
572   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
573   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
574   ierr = PetscVerboseInfo((0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
575   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
576   ierr = PetscVerboseInfo((0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
577   PetscFunctionReturn(0);
578 }
579 
580 #undef __FUNCT__
581 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
582 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
583 {
584   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
585   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
586   Mat_SeqBAIJ    *b=(Mat_SeqBAIJ*)baij->B->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
589   PetscInt       *row,*col,other_disassembled;
590   PetscMPIInt    n;
591   PetscTruth     r1,r2,r3;
592   MatScalar      *val;
593   InsertMode     addv = mat->insertmode;
594 
595   PetscFunctionBegin;
596 
597   if (!baij->donotstash) {
598     while (1) {
599       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
600       if (!flg) break;
601 
602       for (i=0; i<n;) {
603         /* Now identify the consecutive vals belonging to the same row */
604         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
605         if (j < n) ncols = j-i;
606         else       ncols = n-i;
607         /* Now assemble all these values with a single function call */
608         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
609         i = j;
610       }
611     }
612     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
613     /* Now process the block-stash. Since the values are stashed column-oriented,
614        set the roworiented flag to column oriented, and after MatSetValues()
615        restore the original flags */
616     r1 = baij->roworiented;
617     r2 = a->roworiented;
618     r3 = b->roworiented;
619     baij->roworiented = PETSC_FALSE;
620     a->roworiented    = PETSC_FALSE;
621     b->roworiented    = PETSC_FALSE;
622     while (1) {
623       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
624       if (!flg) break;
625 
626       for (i=0; i<n;) {
627         /* Now identify the consecutive vals belonging to the same row */
628         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
629         if (j < n) ncols = j-i;
630         else       ncols = n-i;
631         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
632         i = j;
633       }
634     }
635     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
636     baij->roworiented = r1;
637     a->roworiented    = r2;
638     b->roworiented    = r3;
639   }
640 
641   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
642   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
643 
644   /* determine if any processor has disassembled, if so we must
645      also disassemble ourselfs, in order that we may reassemble. */
646   /*
647      if nonzero structure of submatrix B cannot change then we know that
648      no processor disassembled thus we can skip this stuff
649   */
650   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
651     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
652     if (mat->was_assembled && !other_disassembled) {
653       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
654     }
655   }
656 
657   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
658     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
659   }
660   b->compressedrow.use = PETSC_TRUE;
661   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
662   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
663 
664   if (baij->rowvalues) {
665     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
666     baij->rowvalues = 0;
667   }
668 
669   PetscFunctionReturn(0);
670 }
671 
672 #undef __FUNCT__
673 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
674 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
675 {
676   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
677   PetscErrorCode    ierr;
678   PetscInt          bs = mat->bs;
679   PetscMPIInt       size = baij->size,rank = baij->rank;
680   PetscTruth        iascii,isdraw;
681   PetscViewer       sviewer;
682   PetscViewerFormat format;
683 
684   PetscFunctionBegin;
685   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
686   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
687   if (iascii) {
688     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
689     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
690       MatInfo info;
691       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
692       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
693       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
694               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
695               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
696       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
697       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
698       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
699       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
700       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
701       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
702       PetscFunctionReturn(0);
703     } else if (format == PETSC_VIEWER_ASCII_INFO) {
704       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
705       PetscFunctionReturn(0);
706     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
707       PetscFunctionReturn(0);
708     }
709   }
710 
711   if (isdraw) {
712     PetscDraw       draw;
713     PetscTruth isnull;
714     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
715     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
716   }
717 
718   if (size == 1) {
719     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
720     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
721   } else {
722     /* assemble the entire matrix onto first processor. */
723     Mat         A;
724     Mat_SeqSBAIJ *Aloc;
725     Mat_SeqBAIJ *Bloc;
726     PetscInt         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
727     MatScalar   *a;
728 
729     /* Should this be the same type as mat? */
730     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
731     if (!rank) {
732       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
733     } else {
734       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
735     }
736     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
737     ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
738     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
739 
740     /* copy over the A part */
741     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
742     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
743     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
744 
745     for (i=0; i<mbs; i++) {
746       rvals[0] = bs*(baij->rstart + i);
747       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
748       for (j=ai[i]; j<ai[i+1]; j++) {
749         col = (baij->cstart+aj[j])*bs;
750         for (k=0; k<bs; k++) {
751           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
752           col++; a += bs;
753         }
754       }
755     }
756     /* copy over the B part */
757     Bloc = (Mat_SeqBAIJ*)baij->B->data;
758     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
759     for (i=0; i<mbs; i++) {
760       rvals[0] = bs*(baij->rstart + i);
761       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
762       for (j=ai[i]; j<ai[i+1]; j++) {
763         col = baij->garray[aj[j]]*bs;
764         for (k=0; k<bs; k++) {
765           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
766           col++; a += bs;
767         }
768       }
769     }
770     ierr = PetscFree(rvals);CHKERRQ(ierr);
771     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
772     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
773     /*
774        Everyone has to call to draw the matrix since the graphics waits are
775        synchronized across all processors that share the PetscDraw object
776     */
777     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
778     if (!rank) {
779       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
780       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
781     }
782     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
783     ierr = MatDestroy(A);CHKERRQ(ierr);
784   }
785   PetscFunctionReturn(0);
786 }
787 
788 #undef __FUNCT__
789 #define __FUNCT__ "MatView_MPISBAIJ"
790 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
791 {
792   PetscErrorCode ierr;
793   PetscTruth     iascii,isdraw,issocket,isbinary;
794 
795   PetscFunctionBegin;
796   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
797   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
798   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
799   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
800   if (iascii || isdraw || issocket || isbinary) {
801     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
802   } else {
803     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
804   }
805   PetscFunctionReturn(0);
806 }
807 
808 #undef __FUNCT__
809 #define __FUNCT__ "MatDestroy_MPISBAIJ"
810 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
811 {
812   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
813   PetscErrorCode ierr;
814 
815   PetscFunctionBegin;
816 #if defined(PETSC_USE_LOG)
817   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
818 #endif
819   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
820   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
821   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
822   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
823   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
824 #if defined (PETSC_USE_CTABLE)
825   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
826 #else
827   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
828 #endif
829   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
830   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
831   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
832   if (baij->slvec0) {
833     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
834     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
835   }
836   if (baij->slvec1) {
837     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
838     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
839     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
840   }
841   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
842   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
843   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
844   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
845 #if defined(PETSC_USE_MAT_SINGLE)
846   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
847 #endif
848   ierr = PetscFree(baij);CHKERRQ(ierr);
849 
850   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
851   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
852   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
853   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
854   PetscFunctionReturn(0);
855 }
856 
857 #undef __FUNCT__
858 #define __FUNCT__ "MatMult_MPISBAIJ"
859 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
860 {
861   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
862   PetscErrorCode ierr;
863   PetscInt       nt,mbs=a->mbs,bs=A->bs;
864   PetscScalar    *x,*from,zero=0.0;
865 
866   PetscFunctionBegin;
867   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
868   if (nt != A->n) {
869     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
870   }
871   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
872   if (nt != A->m) {
873     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
874   }
875 
876   /* diagonal part */
877   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
878   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
879 
880   /* subdiagonal part */
881   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
882 
883   /* copy x into the vec slvec0 */
884   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
885   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
886   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
887   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
888 
889   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
890   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
891   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
892 
893   /* supperdiagonal part */
894   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
895 
896   PetscFunctionReturn(0);
897 }
898 
899 #undef __FUNCT__
900 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
901 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
902 {
903   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
904   PetscErrorCode ierr;
905   PetscInt       nt;
906 
907   PetscFunctionBegin;
908   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
909   if (nt != A->n) {
910     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
911   }
912   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
913   if (nt != A->m) {
914     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
915   }
916 
917   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
918   /* do diagonal part */
919   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
920   /* do supperdiagonal part */
921   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
922   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
923   /* do subdiagonal part */
924   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
925   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
926   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
927 
928   PetscFunctionReturn(0);
929 }
930 
931 #undef __FUNCT__
932 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
933 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
934 {
935   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
936   PetscErrorCode ierr;
937   PetscInt       mbs=a->mbs,bs=A->bs;
938   PetscScalar    *x,*from,zero=0.0;
939 
940   PetscFunctionBegin;
941   /*
942   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
943   PetscSynchronizedFlush(A->comm);
944   */
945   /* diagonal part */
946   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
947   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
948 
949   /* subdiagonal part */
950   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
951 
952   /* copy x into the vec slvec0 */
953   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
954   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
955   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
956   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
957 
958   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
959   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
960   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
961 
962   /* supperdiagonal part */
963   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
964 
965   PetscFunctionReturn(0);
966 }
967 
968 #undef __FUNCT__
969 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
970 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
971 {
972   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
973   PetscErrorCode ierr;
974 
975   PetscFunctionBegin;
976   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
977   /* do diagonal part */
978   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
979   /* do supperdiagonal part */
980   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
981   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
982 
983   /* do subdiagonal part */
984   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
985   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
986   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
987 
988   PetscFunctionReturn(0);
989 }
990 
991 /*
992   This only works correctly for square matrices where the subblock A->A is the
993    diagonal block
994 */
995 #undef __FUNCT__
996 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
997 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
998 {
999   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1000   PetscErrorCode ierr;
1001 
1002   PetscFunctionBegin;
1003   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1004   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 #undef __FUNCT__
1009 #define __FUNCT__ "MatScale_MPISBAIJ"
1010 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1011 {
1012   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1013   PetscErrorCode ierr;
1014 
1015   PetscFunctionBegin;
1016   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1017   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1018   PetscFunctionReturn(0);
1019 }
1020 
1021 #undef __FUNCT__
1022 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1023 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1024 {
1025   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1026   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1027   PetscErrorCode ierr;
1028   PetscInt       bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1029   PetscInt       nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1030   PetscInt       *cmap,*idx_p,cstart = mat->cstart;
1031 
1032   PetscFunctionBegin;
1033   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1034   mat->getrowactive = PETSC_TRUE;
1035 
1036   if (!mat->rowvalues && (idx || v)) {
1037     /*
1038         allocate enough space to hold information from the longest row.
1039     */
1040     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1041     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1042     PetscInt     max = 1,mbs = mat->mbs,tmp;
1043     for (i=0; i<mbs; i++) {
1044       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1045       if (max < tmp) { max = tmp; }
1046     }
1047     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1048     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1049   }
1050 
1051   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1052   lrow = row - brstart;  /* local row index */
1053 
1054   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1055   if (!v)   {pvA = 0; pvB = 0;}
1056   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1057   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1058   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1059   nztot = nzA + nzB;
1060 
1061   cmap  = mat->garray;
1062   if (v  || idx) {
1063     if (nztot) {
1064       /* Sort by increasing column numbers, assuming A and B already sorted */
1065       PetscInt imark = -1;
1066       if (v) {
1067         *v = v_p = mat->rowvalues;
1068         for (i=0; i<nzB; i++) {
1069           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1070           else break;
1071         }
1072         imark = i;
1073         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1074         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1075       }
1076       if (idx) {
1077         *idx = idx_p = mat->rowindices;
1078         if (imark > -1) {
1079           for (i=0; i<imark; i++) {
1080             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1081           }
1082         } else {
1083           for (i=0; i<nzB; i++) {
1084             if (cmap[cworkB[i]/bs] < cstart)
1085               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1086             else break;
1087           }
1088           imark = i;
1089         }
1090         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1091         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1092       }
1093     } else {
1094       if (idx) *idx = 0;
1095       if (v)   *v   = 0;
1096     }
1097   }
1098   *nz = nztot;
1099   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1100   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1101   PetscFunctionReturn(0);
1102 }
1103 
1104 #undef __FUNCT__
1105 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1106 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1107 {
1108   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1109 
1110   PetscFunctionBegin;
1111   if (!baij->getrowactive) {
1112     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1113   }
1114   baij->getrowactive = PETSC_FALSE;
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1120 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1121 {
1122   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1123   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1124 
1125   PetscFunctionBegin;
1126   aA->getrow_utriangular = PETSC_TRUE;
1127   PetscFunctionReturn(0);
1128 }
1129 #undef __FUNCT__
1130 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1131 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1132 {
1133   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1134   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1135 
1136   PetscFunctionBegin;
1137   aA->getrow_utriangular = PETSC_FALSE;
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 #undef __FUNCT__
1142 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1143 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1144 {
1145   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1146   PetscErrorCode ierr;
1147 
1148   PetscFunctionBegin;
1149   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1150   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 #undef __FUNCT__
1155 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1156 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1157 {
1158   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1159   PetscErrorCode ierr;
1160 
1161   PetscFunctionBegin;
1162   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1163   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 #undef __FUNCT__
1168 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1169 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1170 {
1171   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1172   PetscErrorCode ierr;
1173 
1174   PetscFunctionBegin;
1175   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1176   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 #undef __FUNCT__
1181 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1182 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1183 {
1184   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1185   Mat            A = a->A,B = a->B;
1186   PetscErrorCode ierr;
1187   PetscReal      isend[5],irecv[5];
1188 
1189   PetscFunctionBegin;
1190   info->block_size     = (PetscReal)matin->bs;
1191   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1192   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1193   isend[3] = info->memory;  isend[4] = info->mallocs;
1194   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1195   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1196   isend[3] += info->memory;  isend[4] += info->mallocs;
1197   if (flag == MAT_LOCAL) {
1198     info->nz_used      = isend[0];
1199     info->nz_allocated = isend[1];
1200     info->nz_unneeded  = isend[2];
1201     info->memory       = isend[3];
1202     info->mallocs      = isend[4];
1203   } else if (flag == MAT_GLOBAL_MAX) {
1204     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1205     info->nz_used      = irecv[0];
1206     info->nz_allocated = irecv[1];
1207     info->nz_unneeded  = irecv[2];
1208     info->memory       = irecv[3];
1209     info->mallocs      = irecv[4];
1210   } else if (flag == MAT_GLOBAL_SUM) {
1211     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1212     info->nz_used      = irecv[0];
1213     info->nz_allocated = irecv[1];
1214     info->nz_unneeded  = irecv[2];
1215     info->memory       = irecv[3];
1216     info->mallocs      = irecv[4];
1217   } else {
1218     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1219   }
1220   info->rows_global       = (PetscReal)A->M;
1221   info->columns_global    = (PetscReal)A->N;
1222   info->rows_local        = (PetscReal)A->m;
1223   info->columns_local     = (PetscReal)A->N;
1224   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1225   info->fill_ratio_needed = 0;
1226   info->factor_mallocs    = 0;
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 #undef __FUNCT__
1231 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1232 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1233 {
1234   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1235   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1236   PetscErrorCode ierr;
1237 
1238   PetscFunctionBegin;
1239   switch (op) {
1240   case MAT_NO_NEW_NONZERO_LOCATIONS:
1241   case MAT_YES_NEW_NONZERO_LOCATIONS:
1242   case MAT_COLUMNS_UNSORTED:
1243   case MAT_COLUMNS_SORTED:
1244   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1245   case MAT_KEEP_ZEROED_ROWS:
1246   case MAT_NEW_NONZERO_LOCATION_ERR:
1247     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1248     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1249     break;
1250   case MAT_ROW_ORIENTED:
1251     a->roworiented = PETSC_TRUE;
1252     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1253     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1254     break;
1255   case MAT_ROWS_SORTED:
1256   case MAT_ROWS_UNSORTED:
1257   case MAT_YES_NEW_DIAGONALS:
1258     ierr = PetscVerboseInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr);
1259     break;
1260   case MAT_COLUMN_ORIENTED:
1261     a->roworiented = PETSC_FALSE;
1262     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1263     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1264     break;
1265   case MAT_IGNORE_OFF_PROC_ENTRIES:
1266     a->donotstash = PETSC_TRUE;
1267     break;
1268   case MAT_NO_NEW_DIAGONALS:
1269     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1270   case MAT_USE_HASH_TABLE:
1271     a->ht_flag = PETSC_TRUE;
1272     break;
1273   case MAT_NOT_SYMMETRIC:
1274   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1275   case MAT_HERMITIAN:
1276     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1277   case MAT_SYMMETRIC:
1278   case MAT_STRUCTURALLY_SYMMETRIC:
1279   case MAT_NOT_HERMITIAN:
1280   case MAT_SYMMETRY_ETERNAL:
1281   case MAT_NOT_SYMMETRY_ETERNAL:
1282     break;
1283   case MAT_IGNORE_LOWER_TRIANGULAR:
1284     aA->ignore_ltriangular = PETSC_TRUE;
1285     break;
1286   case MAT_ERROR_LOWER_TRIANGULAR:
1287     aA->ignore_ltriangular = PETSC_FALSE;
1288     break;
1289   case MAT_GETROW_UPPERTRIANGULAR:
1290     aA->getrow_utriangular = PETSC_TRUE;
1291     break;
1292   default:
1293     SETERRQ(PETSC_ERR_SUP,"unknown option");
1294   }
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 #undef __FUNCT__
1299 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1300 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1301 {
1302   PetscErrorCode ierr;
1303   PetscFunctionBegin;
1304   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 #undef __FUNCT__
1309 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1310 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1311 {
1312   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1313   Mat            a=baij->A, b=baij->B;
1314   PetscErrorCode ierr;
1315   PetscInt       nv,m,n;
1316   PetscTruth     flg;
1317 
1318   PetscFunctionBegin;
1319   if (ll != rr){
1320     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1321     if (!flg)
1322       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1323   }
1324   if (!ll) PetscFunctionReturn(0);
1325 
1326   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1327   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1328 
1329   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1330   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1331 
1332   ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1333 
1334   /* left diagonalscale the off-diagonal part */
1335   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1336 
1337   /* scale the diagonal part */
1338   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1339 
1340   /* right diagonalscale the off-diagonal part */
1341   ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1342   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1343   PetscFunctionReturn(0);
1344 }
1345 
1346 #undef __FUNCT__
1347 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1348 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1349 {
1350   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1351   MPI_Comm          comm = A->comm;
1352   static PetscTruth called = PETSC_FALSE;
1353   PetscErrorCode    ierr;
1354 
1355   PetscFunctionBegin;
1356   if (!a->rank) {
1357     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1358   }
1359   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1360   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1361   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 #undef __FUNCT__
1366 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1367 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1368 {
1369   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1370   PetscErrorCode ierr;
1371 
1372   PetscFunctionBegin;
1373   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1378 
1379 #undef __FUNCT__
1380 #define __FUNCT__ "MatEqual_MPISBAIJ"
1381 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1382 {
1383   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1384   Mat            a,b,c,d;
1385   PetscTruth     flg;
1386   PetscErrorCode ierr;
1387 
1388   PetscFunctionBegin;
1389   a = matA->A; b = matA->B;
1390   c = matB->A; d = matB->B;
1391 
1392   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1393   if (flg) {
1394     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1395   }
1396   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1397   PetscFunctionReturn(0);
1398 }
1399 
1400 #undef __FUNCT__
1401 #define __FUNCT__ "MatCopy_MPISBAIJ"
1402 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1403 {
1404   PetscErrorCode ierr;
1405   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1406   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1407 
1408   PetscFunctionBegin;
1409   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1410   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1411     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1412     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1413     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1414   } else {
1415     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1416     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1417   }
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1423 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1424 {
1425   PetscErrorCode ierr;
1426 
1427   PetscFunctionBegin;
1428   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 #include "petscblaslapack.h"
1433 #undef __FUNCT__
1434 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1435 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1436 {
1437   PetscErrorCode ierr;
1438   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1439   PetscBLASInt   bnz,one=1;
1440   Mat_SeqSBAIJ   *xa,*ya;
1441   Mat_SeqBAIJ    *xb,*yb;
1442 
1443   PetscFunctionBegin;
1444   if (str == SAME_NONZERO_PATTERN) {
1445     PetscScalar alpha = a;
1446     xa = (Mat_SeqSBAIJ *)xx->A->data;
1447     ya = (Mat_SeqSBAIJ *)yy->A->data;
1448     bnz = (PetscBLASInt)xa->nz;
1449     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1450     xb = (Mat_SeqBAIJ *)xx->B->data;
1451     yb = (Mat_SeqBAIJ *)yy->B->data;
1452     bnz = (PetscBLASInt)xb->nz;
1453     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1454   } else {
1455     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1456     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1457     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1458   }
1459   PetscFunctionReturn(0);
1460 }
1461 
1462 #undef __FUNCT__
1463 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1464 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1465 {
1466   PetscErrorCode ierr;
1467   PetscInt       i;
1468   PetscTruth     flg;
1469 
1470   PetscFunctionBegin;
1471   for (i=0; i<n; i++) {
1472     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1473     if (!flg) {
1474       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1475     }
1476   }
1477   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1478   PetscFunctionReturn(0);
1479 }
1480 
1481 
1482 /* -------------------------------------------------------------------*/
1483 static struct _MatOps MatOps_Values = {
1484        MatSetValues_MPISBAIJ,
1485        MatGetRow_MPISBAIJ,
1486        MatRestoreRow_MPISBAIJ,
1487        MatMult_MPISBAIJ,
1488 /* 4*/ MatMultAdd_MPISBAIJ,
1489        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1490        MatMultAdd_MPISBAIJ,
1491        0,
1492        0,
1493        0,
1494 /*10*/ 0,
1495        0,
1496        0,
1497        MatRelax_MPISBAIJ,
1498        MatTranspose_MPISBAIJ,
1499 /*15*/ MatGetInfo_MPISBAIJ,
1500        MatEqual_MPISBAIJ,
1501        MatGetDiagonal_MPISBAIJ,
1502        MatDiagonalScale_MPISBAIJ,
1503        MatNorm_MPISBAIJ,
1504 /*20*/ MatAssemblyBegin_MPISBAIJ,
1505        MatAssemblyEnd_MPISBAIJ,
1506        0,
1507        MatSetOption_MPISBAIJ,
1508        MatZeroEntries_MPISBAIJ,
1509 /*25*/ 0,
1510        0,
1511        0,
1512        0,
1513        0,
1514 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1515        0,
1516        0,
1517        0,
1518        0,
1519 /*35*/ MatDuplicate_MPISBAIJ,
1520        0,
1521        0,
1522        0,
1523        0,
1524 /*40*/ MatAXPY_MPISBAIJ,
1525        MatGetSubMatrices_MPISBAIJ,
1526        MatIncreaseOverlap_MPISBAIJ,
1527        MatGetValues_MPISBAIJ,
1528        MatCopy_MPISBAIJ,
1529 /*45*/ MatPrintHelp_MPISBAIJ,
1530        MatScale_MPISBAIJ,
1531        0,
1532        0,
1533        0,
1534 /*50*/ 0,
1535        0,
1536        0,
1537        0,
1538        0,
1539 /*55*/ 0,
1540        0,
1541        MatSetUnfactored_MPISBAIJ,
1542        0,
1543        MatSetValuesBlocked_MPISBAIJ,
1544 /*60*/ 0,
1545        0,
1546        0,
1547        MatGetPetscMaps_Petsc,
1548        0,
1549 /*65*/ 0,
1550        0,
1551        0,
1552        0,
1553        0,
1554 /*70*/ MatGetRowMax_MPISBAIJ,
1555        0,
1556        0,
1557        0,
1558        0,
1559 /*75*/ 0,
1560        0,
1561        0,
1562        0,
1563        0,
1564 /*80*/ 0,
1565        0,
1566        0,
1567        0,
1568        MatLoad_MPISBAIJ,
1569 /*85*/ 0,
1570        0,
1571        0,
1572        0,
1573        0,
1574 /*90*/ 0,
1575        0,
1576        0,
1577        0,
1578        0,
1579 /*95*/ 0,
1580        0,
1581        0,
1582        0,
1583        0,
1584 /*100*/0,
1585        0,
1586        0,
1587        0,
1588        0,
1589 /*105*/0,
1590        MatRealPart_MPISBAIJ,
1591        MatImaginaryPart_MPISBAIJ,
1592        MatGetRowUpperTriangular_MPISBAIJ,
1593        MatRestoreRowUpperTriangular_MPISBAIJ
1594 };
1595 
1596 
1597 EXTERN_C_BEGIN
1598 #undef __FUNCT__
1599 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1600 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1601 {
1602   PetscFunctionBegin;
1603   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1604   *iscopy = PETSC_FALSE;
1605   PetscFunctionReturn(0);
1606 }
1607 EXTERN_C_END
1608 
1609 EXTERN_C_BEGIN
1610 #undef __FUNCT__
1611 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1612 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1613 {
1614   Mat_MPISBAIJ   *b;
1615   PetscErrorCode ierr;
1616   PetscInt       i,mbs,Mbs;
1617 
1618   PetscFunctionBegin;
1619   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1620 
1621   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1622   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1623   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1624   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1625   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1626   if (d_nnz) {
1627     for (i=0; i<B->m/bs; i++) {
1628       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1629     }
1630   }
1631   if (o_nnz) {
1632     for (i=0; i<B->m/bs; i++) {
1633       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1634     }
1635   }
1636   B->preallocated = PETSC_TRUE;
1637   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1638   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1639   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1640   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1641 
1642   b   = (Mat_MPISBAIJ*)B->data;
1643   mbs = B->m/bs;
1644   Mbs = B->M/bs;
1645   if (mbs*bs != B->m) {
1646     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs);
1647   }
1648 
1649   B->bs  = bs;
1650   b->bs2 = bs*bs;
1651   b->mbs = mbs;
1652   b->nbs = mbs;
1653   b->Mbs = Mbs;
1654   b->Nbs = Mbs;
1655 
1656   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
1657   b->rowners[0]    = 0;
1658   for (i=2; i<=b->size; i++) {
1659     b->rowners[i] += b->rowners[i-1];
1660   }
1661   b->rstart    = b->rowners[b->rank];
1662   b->rend      = b->rowners[b->rank+1];
1663   b->cstart    = b->rstart;
1664   b->cend      = b->rend;
1665   for (i=0; i<=b->size; i++) {
1666     b->rowners_bs[i] = b->rowners[i]*bs;
1667   }
1668   b->rstart_bs = b-> rstart*bs;
1669   b->rend_bs   = b->rend*bs;
1670 
1671   b->cstart_bs = b->cstart*bs;
1672   b->cend_bs   = b->cend*bs;
1673 
1674   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1675   ierr = MatSetSizes(b->A,B->m,B->m,B->m,B->m);CHKERRQ(ierr);
1676   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1677   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1678   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1679 
1680   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1681   ierr = MatSetSizes(b->B,B->m,B->M,B->m,B->M);CHKERRQ(ierr);
1682   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1683   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1684   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1685 
1686   /* build cache for off array entries formed */
1687   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1688 
1689   PetscFunctionReturn(0);
1690 }
1691 EXTERN_C_END
1692 
1693 /*MC
1694    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1695    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1696 
1697    Options Database Keys:
1698 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1699 
1700   Level: beginner
1701 
1702 .seealso: MatCreateMPISBAIJ
1703 M*/
1704 
1705 EXTERN_C_BEGIN
1706 #undef __FUNCT__
1707 #define __FUNCT__ "MatCreate_MPISBAIJ"
1708 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1709 {
1710   Mat_MPISBAIJ   *b;
1711   PetscErrorCode ierr;
1712   PetscTruth     flg;
1713 
1714   PetscFunctionBegin;
1715 
1716   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1717   B->data = (void*)b;
1718   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1719 
1720   B->ops->destroy    = MatDestroy_MPISBAIJ;
1721   B->ops->view       = MatView_MPISBAIJ;
1722   B->mapping    = 0;
1723   B->factor     = 0;
1724   B->assembled  = PETSC_FALSE;
1725 
1726   B->insertmode = NOT_SET_VALUES;
1727   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1728   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1729 
1730   /* build local table of row and column ownerships */
1731   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
1732   b->cowners    = b->rowners + b->size + 2;
1733   b->rowners_bs = b->cowners + b->size + 2;
1734   ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1735 
1736   /* build cache for off array entries formed */
1737   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1738   b->donotstash  = PETSC_FALSE;
1739   b->colmap      = PETSC_NULL;
1740   b->garray      = PETSC_NULL;
1741   b->roworiented = PETSC_TRUE;
1742 
1743 #if defined(PETSC_USE_MAT_SINGLE)
1744   /* stuff for MatSetValues_XXX in single precision */
1745   b->setvalueslen     = 0;
1746   b->setvaluescopy    = PETSC_NULL;
1747 #endif
1748 
1749   /* stuff used in block assembly */
1750   b->barray       = 0;
1751 
1752   /* stuff used for matrix vector multiply */
1753   b->lvec         = 0;
1754   b->Mvctx        = 0;
1755   b->slvec0       = 0;
1756   b->slvec0b      = 0;
1757   b->slvec1       = 0;
1758   b->slvec1a      = 0;
1759   b->slvec1b      = 0;
1760   b->sMvctx       = 0;
1761 
1762   /* stuff for MatGetRow() */
1763   b->rowindices   = 0;
1764   b->rowvalues    = 0;
1765   b->getrowactive = PETSC_FALSE;
1766 
1767   /* hash table stuff */
1768   b->ht           = 0;
1769   b->hd           = 0;
1770   b->ht_size      = 0;
1771   b->ht_flag      = PETSC_FALSE;
1772   b->ht_fact      = 0;
1773   b->ht_total_ct  = 0;
1774   b->ht_insert_ct = 0;
1775 
1776   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1777   if (flg) {
1778     PetscReal fact = 1.39;
1779     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1780     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1781     if (fact <= 1.0) fact = 1.39;
1782     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1783     ierr = PetscVerboseInfo((0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr);
1784   }
1785   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1786                                      "MatStoreValues_MPISBAIJ",
1787                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1788   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1789                                      "MatRetrieveValues_MPISBAIJ",
1790                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1791   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1792                                      "MatGetDiagonalBlock_MPISBAIJ",
1793                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1794   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1795                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1796                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1797   B->symmetric                  = PETSC_TRUE;
1798   B->structurally_symmetric     = PETSC_TRUE;
1799   B->symmetric_set              = PETSC_TRUE;
1800   B->structurally_symmetric_set = PETSC_TRUE;
1801   PetscFunctionReturn(0);
1802 }
1803 EXTERN_C_END
1804 
1805 /*MC
1806    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1807 
1808    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1809    and MATMPISBAIJ otherwise.
1810 
1811    Options Database Keys:
1812 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1813 
1814   Level: beginner
1815 
1816 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1817 M*/
1818 
1819 EXTERN_C_BEGIN
1820 #undef __FUNCT__
1821 #define __FUNCT__ "MatCreate_SBAIJ"
1822 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1823 {
1824   PetscErrorCode ierr;
1825   PetscMPIInt    size;
1826 
1827   PetscFunctionBegin;
1828   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1829   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1830   if (size == 1) {
1831     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1832   } else {
1833     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1834   }
1835   PetscFunctionReturn(0);
1836 }
1837 EXTERN_C_END
1838 
1839 #undef __FUNCT__
1840 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1841 /*@C
1842    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1843    the user should preallocate the matrix storage by setting the parameters
1844    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1845    performance can be increased by more than a factor of 50.
1846 
1847    Collective on Mat
1848 
1849    Input Parameters:
1850 +  A - the matrix
1851 .  bs   - size of blockk
1852 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1853            submatrix  (same for all local rows)
1854 .  d_nnz - array containing the number of block nonzeros in the various block rows
1855            in the upper triangular and diagonal part of the in diagonal portion of the local
1856            (possibly different for each block row) or PETSC_NULL.  You must leave room
1857            for the diagonal entry even if it is zero.
1858 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1859            submatrix (same for all local rows).
1860 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1861            off-diagonal portion of the local submatrix (possibly different for
1862            each block row) or PETSC_NULL.
1863 
1864 
1865    Options Database Keys:
1866 .   -mat_no_unroll - uses code that does not unroll the loops in the
1867                      block calculations (much slower)
1868 .   -mat_block_size - size of the blocks to use
1869 
1870    Notes:
1871 
1872    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1873    than it must be used on all processors that share the object for that argument.
1874 
1875    If the *_nnz parameter is given then the *_nz parameter is ignored
1876 
1877    Storage Information:
1878    For a square global matrix we define each processor's diagonal portion
1879    to be its local rows and the corresponding columns (a square submatrix);
1880    each processor's off-diagonal portion encompasses the remainder of the
1881    local matrix (a rectangular submatrix).
1882 
1883    The user can specify preallocated storage for the diagonal part of
1884    the local submatrix with either d_nz or d_nnz (not both).  Set
1885    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1886    memory allocation.  Likewise, specify preallocated storage for the
1887    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1888 
1889    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1890    the figure below we depict these three local rows and all columns (0-11).
1891 
1892 .vb
1893            0 1 2 3 4 5 6 7 8 9 10 11
1894           -------------------
1895    row 3  |  o o o d d d o o o o o o
1896    row 4  |  o o o d d d o o o o o o
1897    row 5  |  o o o d d d o o o o o o
1898           -------------------
1899 .ve
1900 
1901    Thus, any entries in the d locations are stored in the d (diagonal)
1902    submatrix, and any entries in the o locations are stored in the
1903    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1904    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1905 
1906    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1907    plus the diagonal part of the d matrix,
1908    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1909    In general, for PDE problems in which most nonzeros are near the diagonal,
1910    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1911    or you will get TERRIBLE performance; see the users' manual chapter on
1912    matrices.
1913 
1914    Level: intermediate
1915 
1916 .keywords: matrix, block, aij, compressed row, sparse, parallel
1917 
1918 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1919 @*/
1920 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1921 {
1922   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1923 
1924   PetscFunctionBegin;
1925   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1926   if (f) {
1927     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1928   }
1929   PetscFunctionReturn(0);
1930 }
1931 
1932 #undef __FUNCT__
1933 #define __FUNCT__ "MatCreateMPISBAIJ"
1934 /*@C
1935    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1936    (block compressed row).  For good matrix assembly performance
1937    the user should preallocate the matrix storage by setting the parameters
1938    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1939    performance can be increased by more than a factor of 50.
1940 
1941    Collective on MPI_Comm
1942 
1943    Input Parameters:
1944 +  comm - MPI communicator
1945 .  bs   - size of blockk
1946 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1947            This value should be the same as the local size used in creating the
1948            y vector for the matrix-vector product y = Ax.
1949 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1950            This value should be the same as the local size used in creating the
1951            x vector for the matrix-vector product y = Ax.
1952 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1953 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1954 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1955            submatrix  (same for all local rows)
1956 .  d_nnz - array containing the number of block nonzeros in the various block rows
1957            in the upper triangular portion of the in diagonal portion of the local
1958            (possibly different for each block block row) or PETSC_NULL.
1959            You must leave room for the diagonal entry even if it is zero.
1960 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1961            submatrix (same for all local rows).
1962 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1963            off-diagonal portion of the local submatrix (possibly different for
1964            each block row) or PETSC_NULL.
1965 
1966    Output Parameter:
1967 .  A - the matrix
1968 
1969    Options Database Keys:
1970 .   -mat_no_unroll - uses code that does not unroll the loops in the
1971                      block calculations (much slower)
1972 .   -mat_block_size - size of the blocks to use
1973 .   -mat_mpi - use the parallel matrix data structures even on one processor
1974                (defaults to using SeqBAIJ format on one processor)
1975 
1976    Notes:
1977    The number of rows and columns must be divisible by blocksize.
1978 
1979    The user MUST specify either the local or global matrix dimensions
1980    (possibly both).
1981 
1982    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1983    than it must be used on all processors that share the object for that argument.
1984 
1985    If the *_nnz parameter is given then the *_nz parameter is ignored
1986 
1987    Storage Information:
1988    For a square global matrix we define each processor's diagonal portion
1989    to be its local rows and the corresponding columns (a square submatrix);
1990    each processor's off-diagonal portion encompasses the remainder of the
1991    local matrix (a rectangular submatrix).
1992 
1993    The user can specify preallocated storage for the diagonal part of
1994    the local submatrix with either d_nz or d_nnz (not both).  Set
1995    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1996    memory allocation.  Likewise, specify preallocated storage for the
1997    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1998 
1999    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2000    the figure below we depict these three local rows and all columns (0-11).
2001 
2002 .vb
2003            0 1 2 3 4 5 6 7 8 9 10 11
2004           -------------------
2005    row 3  |  o o o d d d o o o o o o
2006    row 4  |  o o o d d d o o o o o o
2007    row 5  |  o o o d d d o o o o o o
2008           -------------------
2009 .ve
2010 
2011    Thus, any entries in the d locations are stored in the d (diagonal)
2012    submatrix, and any entries in the o locations are stored in the
2013    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2014    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2015 
2016    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2017    plus the diagonal part of the d matrix,
2018    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2019    In general, for PDE problems in which most nonzeros are near the diagonal,
2020    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2021    or you will get TERRIBLE performance; see the users' manual chapter on
2022    matrices.
2023 
2024    Level: intermediate
2025 
2026 .keywords: matrix, block, aij, compressed row, sparse, parallel
2027 
2028 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2029 @*/
2030 
2031 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2032 {
2033   PetscErrorCode ierr;
2034   PetscMPIInt    size;
2035 
2036   PetscFunctionBegin;
2037   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2038   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2039   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2040   if (size > 1) {
2041     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2042     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2043   } else {
2044     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2045     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2046   }
2047   PetscFunctionReturn(0);
2048 }
2049 
2050 
2051 #undef __FUNCT__
2052 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2053 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2054 {
2055   Mat            mat;
2056   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2057   PetscErrorCode ierr;
2058   PetscInt       len=0,nt,bs=matin->bs,mbs=oldmat->mbs;
2059   PetscScalar    *array;
2060 
2061   PetscFunctionBegin;
2062   *newmat       = 0;
2063   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2064   ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr);
2065   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2066   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2067 
2068   mat->factor       = matin->factor;
2069   mat->preallocated = PETSC_TRUE;
2070   mat->assembled    = PETSC_TRUE;
2071   mat->insertmode   = NOT_SET_VALUES;
2072 
2073   a = (Mat_MPISBAIJ*)mat->data;
2074   mat->bs  = matin->bs;
2075   a->bs2   = oldmat->bs2;
2076   a->mbs   = oldmat->mbs;
2077   a->nbs   = oldmat->nbs;
2078   a->Mbs   = oldmat->Mbs;
2079   a->Nbs   = oldmat->Nbs;
2080 
2081   a->rstart       = oldmat->rstart;
2082   a->rend         = oldmat->rend;
2083   a->cstart       = oldmat->cstart;
2084   a->cend         = oldmat->cend;
2085   a->size         = oldmat->size;
2086   a->rank         = oldmat->rank;
2087   a->donotstash   = oldmat->donotstash;
2088   a->roworiented  = oldmat->roworiented;
2089   a->rowindices   = 0;
2090   a->rowvalues    = 0;
2091   a->getrowactive = PETSC_FALSE;
2092   a->barray       = 0;
2093   a->rstart_bs    = oldmat->rstart_bs;
2094   a->rend_bs      = oldmat->rend_bs;
2095   a->cstart_bs    = oldmat->cstart_bs;
2096   a->cend_bs      = oldmat->cend_bs;
2097 
2098   /* hash table stuff */
2099   a->ht           = 0;
2100   a->hd           = 0;
2101   a->ht_size      = 0;
2102   a->ht_flag      = oldmat->ht_flag;
2103   a->ht_fact      = oldmat->ht_fact;
2104   a->ht_total_ct  = 0;
2105   a->ht_insert_ct = 0;
2106 
2107   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2108   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2109   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
2110   if (oldmat->colmap) {
2111 #if defined (PETSC_USE_CTABLE)
2112     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2113 #else
2114     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2115     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2116     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2117 #endif
2118   } else a->colmap = 0;
2119 
2120   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2121     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2122     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2123     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2124   } else a->garray = 0;
2125 
2126   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2127   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2128   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2129   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2130 
2131   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2132   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2133   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2134   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2135 
2136   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2137   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2138   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2139   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2140   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2141   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2142   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2143   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2144   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2145   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2146   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2147   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2148   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2149 
2150   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2151   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2152   a->sMvctx = oldmat->sMvctx;
2153   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2154 
2155   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2156   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2157   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2158   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2159   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2160   *newmat = mat;
2161   PetscFunctionReturn(0);
2162 }
2163 
2164 #include "petscsys.h"
2165 
2166 #undef __FUNCT__
2167 #define __FUNCT__ "MatLoad_MPISBAIJ"
2168 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2169 {
2170   Mat            A;
2171   PetscErrorCode ierr;
2172   PetscInt       i,nz,j,rstart,rend;
2173   PetscScalar    *vals,*buf;
2174   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2175   MPI_Status     status;
2176   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens;
2177   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2178   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2179   PetscInt       bs=1,Mbs,mbs,extra_rows;
2180   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2181   PetscInt       dcount,kmax,k,nzcount,tmp;
2182   int            fd;
2183 
2184   PetscFunctionBegin;
2185   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2186 
2187   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2188   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2189   if (!rank) {
2190     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2191     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2192     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2193     if (header[3] < 0) {
2194       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2195     }
2196   }
2197 
2198   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2199   M = header[1]; N = header[2];
2200 
2201   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2202 
2203   /*
2204      This code adds extra rows to make sure the number of rows is
2205      divisible by the blocksize
2206   */
2207   Mbs        = M/bs;
2208   extra_rows = bs - M + bs*(Mbs);
2209   if (extra_rows == bs) extra_rows = 0;
2210   else                  Mbs++;
2211   if (extra_rows &&!rank) {
2212     ierr = PetscVerboseInfo((0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2213   }
2214 
2215   /* determine ownership of all rows */
2216   mbs        = Mbs/size + ((Mbs % size) > rank);
2217   m          = mbs*bs;
2218   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2219   browners   = rowners + size + 1;
2220   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2221   rowners[0] = 0;
2222   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2223   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2224   rstart = rowners[rank];
2225   rend   = rowners[rank+1];
2226 
2227   /* distribute row lengths to all processors */
2228   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2229   if (!rank) {
2230     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2231     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2232     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2233     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2234     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2235     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2236     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2237   } else {
2238     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2239   }
2240 
2241   if (!rank) {   /* procs[0] */
2242     /* calculate the number of nonzeros on each processor */
2243     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2244     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2245     for (i=0; i<size; i++) {
2246       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2247         procsnz[i] += rowlengths[j];
2248       }
2249     }
2250     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2251 
2252     /* determine max buffer needed and allocate it */
2253     maxnz = 0;
2254     for (i=0; i<size; i++) {
2255       maxnz = PetscMax(maxnz,procsnz[i]);
2256     }
2257     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2258 
2259     /* read in my part of the matrix column indices  */
2260     nz     = procsnz[0];
2261     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2262     mycols = ibuf;
2263     if (size == 1)  nz -= extra_rows;
2264     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2265     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2266 
2267     /* read in every ones (except the last) and ship off */
2268     for (i=1; i<size-1; i++) {
2269       nz   = procsnz[i];
2270       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2271       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2272     }
2273     /* read in the stuff for the last proc */
2274     if (size != 1) {
2275       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2276       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2277       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2278       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2279     }
2280     ierr = PetscFree(cols);CHKERRQ(ierr);
2281   } else {  /* procs[i], i>0 */
2282     /* determine buffer space needed for message */
2283     nz = 0;
2284     for (i=0; i<m; i++) {
2285       nz += locrowlens[i];
2286     }
2287     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2288     mycols = ibuf;
2289     /* receive message of column indices*/
2290     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2291     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2292     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2293   }
2294 
2295   /* loop over local rows, determining number of off diagonal entries */
2296   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2297   odlens   = dlens + (rend-rstart);
2298   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2299   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2300   masked1  = mask    + Mbs;
2301   masked2  = masked1 + Mbs;
2302   rowcount = 0; nzcount = 0;
2303   for (i=0; i<mbs; i++) {
2304     dcount  = 0;
2305     odcount = 0;
2306     for (j=0; j<bs; j++) {
2307       kmax = locrowlens[rowcount];
2308       for (k=0; k<kmax; k++) {
2309         tmp = mycols[nzcount++]/bs; /* block col. index */
2310         if (!mask[tmp]) {
2311           mask[tmp] = 1;
2312           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2313           else masked1[dcount++] = tmp; /* entry in diag portion */
2314         }
2315       }
2316       rowcount++;
2317     }
2318 
2319     dlens[i]  = dcount;  /* d_nzz[i] */
2320     odlens[i] = odcount; /* o_nzz[i] */
2321 
2322     /* zero out the mask elements we set */
2323     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2324     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2325   }
2326 
2327   /* create our matrix */
2328   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2329   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2330   ierr = MatSetType(A,type);CHKERRQ(ierr);
2331   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2332   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2333 
2334   if (!rank) {
2335     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2336     /* read in my part of the matrix numerical values  */
2337     nz = procsnz[0];
2338     vals = buf;
2339     mycols = ibuf;
2340     if (size == 1)  nz -= extra_rows;
2341     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2342     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2343 
2344     /* insert into matrix */
2345     jj      = rstart*bs;
2346     for (i=0; i<m; i++) {
2347       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2348       mycols += locrowlens[i];
2349       vals   += locrowlens[i];
2350       jj++;
2351     }
2352 
2353     /* read in other processors (except the last one) and ship out */
2354     for (i=1; i<size-1; i++) {
2355       nz   = procsnz[i];
2356       vals = buf;
2357       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2358       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2359     }
2360     /* the last proc */
2361     if (size != 1){
2362       nz   = procsnz[i] - extra_rows;
2363       vals = buf;
2364       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2365       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2366       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2367     }
2368     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2369 
2370   } else {
2371     /* receive numeric values */
2372     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2373 
2374     /* receive message of values*/
2375     vals   = buf;
2376     mycols = ibuf;
2377     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2378     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2379     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2380 
2381     /* insert into matrix */
2382     jj      = rstart*bs;
2383     for (i=0; i<m; i++) {
2384       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2385       mycols += locrowlens[i];
2386       vals   += locrowlens[i];
2387       jj++;
2388     }
2389   }
2390 
2391   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2392   ierr = PetscFree(buf);CHKERRQ(ierr);
2393   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2394   ierr = PetscFree(rowners);CHKERRQ(ierr);
2395   ierr = PetscFree(dlens);CHKERRQ(ierr);
2396   ierr = PetscFree(mask);CHKERRQ(ierr);
2397   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2398   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2399   *newmat = A;
2400   PetscFunctionReturn(0);
2401 }
2402 
2403 #undef __FUNCT__
2404 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2405 /*XXXXX@
2406    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2407 
2408    Input Parameters:
2409 .  mat  - the matrix
2410 .  fact - factor
2411 
2412    Collective on Mat
2413 
2414    Level: advanced
2415 
2416   Notes:
2417    This can also be set by the command line option: -mat_use_hash_table fact
2418 
2419 .keywords: matrix, hashtable, factor, HT
2420 
2421 .seealso: MatSetOption()
2422 @XXXXX*/
2423 
2424 
2425 #undef __FUNCT__
2426 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2427 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2428 {
2429   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2430   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2431   PetscReal      atmp;
2432   PetscReal      *work,*svalues,*rvalues;
2433   PetscErrorCode ierr;
2434   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2435   PetscMPIInt    rank,size;
2436   PetscInt       *rowners_bs,dest,count,source;
2437   PetscScalar    *va;
2438   MatScalar      *ba;
2439   MPI_Status     stat;
2440 
2441   PetscFunctionBegin;
2442   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2443   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2444 
2445   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2446   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2447 
2448   bs   = A->bs;
2449   mbs  = a->mbs;
2450   Mbs  = a->Mbs;
2451   ba   = b->a;
2452   bi   = b->i;
2453   bj   = b->j;
2454 
2455   /* find ownerships */
2456   rowners_bs = a->rowners_bs;
2457 
2458   /* each proc creates an array to be distributed */
2459   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2460   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2461 
2462   /* row_max for B */
2463   if (rank != size-1){
2464     for (i=0; i<mbs; i++) {
2465       ncols = bi[1] - bi[0]; bi++;
2466       brow  = bs*i;
2467       for (j=0; j<ncols; j++){
2468         bcol = bs*(*bj);
2469         for (kcol=0; kcol<bs; kcol++){
2470           col = bcol + kcol;                 /* local col index */
2471           col += rowners_bs[rank+1];      /* global col index */
2472           for (krow=0; krow<bs; krow++){
2473             atmp = PetscAbsScalar(*ba); ba++;
2474             row = brow + krow;    /* local row index */
2475             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2476             if (work[col] < atmp) work[col] = atmp;
2477           }
2478         }
2479         bj++;
2480       }
2481     }
2482 
2483     /* send values to its owners */
2484     for (dest=rank+1; dest<size; dest++){
2485       svalues = work + rowners_bs[dest];
2486       count   = rowners_bs[dest+1]-rowners_bs[dest];
2487       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2488     }
2489   }
2490 
2491   /* receive values */
2492   if (rank){
2493     rvalues = work;
2494     count   = rowners_bs[rank+1]-rowners_bs[rank];
2495     for (source=0; source<rank; source++){
2496       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2497       /* process values */
2498       for (i=0; i<count; i++){
2499         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2500       }
2501     }
2502   }
2503 
2504   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2505   ierr = PetscFree(work);CHKERRQ(ierr);
2506   PetscFunctionReturn(0);
2507 }
2508 
2509 #undef __FUNCT__
2510 #define __FUNCT__ "MatRelax_MPISBAIJ"
2511 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2512 {
2513   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2514   PetscErrorCode ierr;
2515   PetscInt       mbs=mat->mbs,bs=matin->bs;
2516   PetscScalar    *x,*b,*ptr,zero=0.0;
2517   Vec            bb1;
2518 
2519   PetscFunctionBegin;
2520   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2521   if (bs > 1)
2522     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2523 
2524   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2525     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2526       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2527       its--;
2528     }
2529 
2530     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2531     while (its--){
2532 
2533       /* lower triangular part: slvec0b = - B^T*xx */
2534       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2535 
2536       /* copy xx into slvec0a */
2537       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2538       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2539       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2540       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2541 
2542       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2543 
2544       /* copy bb into slvec1a */
2545       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2546       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2547       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2548       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2549 
2550       /* set slvec1b = 0 */
2551       ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr);
2552 
2553       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2554       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2555       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2556       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2557 
2558       /* upper triangular part: bb1 = bb1 - B*x */
2559       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2560 
2561       /* local diagonal sweep */
2562       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2563     }
2564     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2565   } else {
2566     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2567   }
2568   PetscFunctionReturn(0);
2569 }
2570 
2571 #undef __FUNCT__
2572 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2573 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2574 {
2575   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2576   PetscErrorCode ierr;
2577   Vec            lvec1,bb1;
2578 
2579   PetscFunctionBegin;
2580   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2581   if (matin->bs > 1)
2582     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2583 
2584   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2585     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2586       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2587       its--;
2588     }
2589 
2590     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2591     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2592     while (its--){
2593       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2594 
2595       /* lower diagonal part: bb1 = bb - B^T*xx */
2596       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2597       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2598 
2599       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2600       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2601       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2602 
2603       /* upper diagonal part: bb1 = bb1 - B*x */
2604       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2605       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2606 
2607       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2608 
2609       /* diagonal sweep */
2610       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2611     }
2612     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2613     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2614   } else {
2615     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2616   }
2617   PetscFunctionReturn(0);
2618 }
2619 
2620