xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 4a93e4dd2dff5bdf5ca7ec0dabef54f43857c032)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
66   PetscErrorCode ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 
76 #define CHUNKSIZE  10
77 
78 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
79 { \
80  \
81     brow = row/bs;  \
82     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
83     rmax = aimax[brow]; nrow = ailen[brow]; \
84       bcol = col/bs; \
85       ridx = row % bs; cidx = col % bs; \
86       low = 0; high = nrow; \
87       while (high-low > 3) { \
88         t = (low+high)/2; \
89         if (rp[t] > bcol) high = t; \
90         else              low  = t; \
91       } \
92       for (_i=low; _i<high; _i++) { \
93         if (rp[_i] > bcol) break; \
94         if (rp[_i] == bcol) { \
95           bap  = ap +  bs2*_i + bs*cidx + ridx; \
96           if (addv == ADD_VALUES) *bap += value;  \
97           else                    *bap  = value;  \
98           goto a_noinsert; \
99         } \
100       } \
101       if (a->nonew == 1) goto a_noinsert; \
102       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
103       MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \
104       N = nrow++ - 1;  \
105       /* shift up all the later entries in this row */ \
106       for (ii=N; ii>=_i; ii--) { \
107         rp[ii+1] = rp[ii]; \
108         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
109       } \
110       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
111       rp[_i]                      = bcol;  \
112       ap[bs2*_i + bs*cidx + ridx] = value;  \
113       a_noinsert:; \
114     ailen[brow] = nrow; \
115 }
116 #ifndef MatSetValues_SeqBAIJ_B_Private
117 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
118 { \
119     brow = row/bs;  \
120     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
121     rmax = bimax[brow]; nrow = bilen[brow]; \
122       bcol = col/bs; \
123       ridx = row % bs; cidx = col % bs; \
124       low = 0; high = nrow; \
125       while (high-low > 3) { \
126         t = (low+high)/2; \
127         if (rp[t] > bcol) high = t; \
128         else              low  = t; \
129       } \
130       for (_i=low; _i<high; _i++) { \
131         if (rp[_i] > bcol) break; \
132         if (rp[_i] == bcol) { \
133           bap  = ap +  bs2*_i + bs*cidx + ridx; \
134           if (addv == ADD_VALUES) *bap += value;  \
135           else                    *bap  = value;  \
136           goto b_noinsert; \
137         } \
138       } \
139       if (b->nonew == 1) goto b_noinsert; \
140       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
141       MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \
142       N = nrow++ - 1;  \
143       /* shift up all the later entries in this row */ \
144       for (ii=N; ii>=_i; ii--) { \
145         rp[ii+1] = rp[ii]; \
146         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
147       } \
148       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
149       rp[_i]                      = bcol;  \
150       ap[bs2*_i + bs*cidx + ridx] = value;  \
151       b_noinsert:; \
152     bilen[brow] = nrow; \
153 }
154 #endif
155 
156 #if defined(PETSC_USE_MAT_SINGLE)
157 #undef __FUNCT__
158 #define __FUNCT__ "MatSetValues_MPISBAIJ"
159 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
160 {
161   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
162   PetscErrorCode ierr;
163   PetscInt       i,N = m*n;
164   MatScalar      *vsingle;
165 
166   PetscFunctionBegin;
167   if (N > b->setvalueslen) {
168     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
169     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
170     b->setvalueslen  = N;
171   }
172   vsingle = b->setvaluescopy;
173 
174   for (i=0; i<N; i++) {
175     vsingle[i] = v[i];
176   }
177   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
178   PetscFunctionReturn(0);
179 }
180 
181 #undef __FUNCT__
182 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
183 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
184 {
185   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
186   PetscErrorCode ierr;
187   PetscInt       i,N = m*n*b->bs2;
188   MatScalar      *vsingle;
189 
190   PetscFunctionBegin;
191   if (N > b->setvalueslen) {
192     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
193     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
194     b->setvalueslen  = N;
195   }
196   vsingle = b->setvaluescopy;
197   for (i=0; i<N; i++) {
198     vsingle[i] = v[i];
199   }
200   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
201   PetscFunctionReturn(0);
202 }
203 #endif
204 
205 /* Only add/insert a(i,j) with i<=j (blocks).
206    Any a(i,j) with i>j input by user is ingored.
207 */
208 #undef __FUNCT__
209 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
210 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
211 {
212   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
213   MatScalar      value;
214   PetscTruth     roworiented = baij->roworiented;
215   PetscErrorCode ierr;
216   PetscInt       i,j,row,col;
217   PetscInt       rstart_orig=baij->rstart_bs;
218   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
219   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
220 
221   /* Some Variables required in the macro */
222   Mat            A = baij->A;
223   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
224   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
225   MatScalar      *aa=a->a;
226 
227   Mat            B = baij->B;
228   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
229   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
230   MatScalar     *ba=b->a;
231 
232   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
233   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
234   MatScalar     *ap,*bap;
235 
236   /* for stash */
237   PetscInt      n_loc, *in_loc=0;
238   MatScalar     *v_loc=0;
239 
240   PetscFunctionBegin;
241 
242   if(!baij->donotstash){
243     ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr);
244     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
245   }
246 
247   for (i=0; i<m; i++) {
248     if (im[i] < 0) continue;
249 #if defined(PETSC_USE_DEBUG)
250     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
251 #endif
252     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
253       row = im[i] - rstart_orig;              /* local row index */
254       for (j=0; j<n; j++) {
255         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
256         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
257           col = in[j] - cstart_orig;          /* local col index */
258           brow = row/bs; bcol = col/bs;
259           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
260           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
261           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
262           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
263         } else if (in[j] < 0) continue;
264 #if defined(PETSC_USE_DEBUG)
265         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
266 #endif
267         else {  /* off-diag entry (B) */
268           if (mat->was_assembled) {
269             if (!baij->colmap) {
270               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
271             }
272 #if defined (PETSC_USE_CTABLE)
273             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
274             col  = col - 1;
275 #else
276             col = baij->colmap[in[j]/bs] - 1;
277 #endif
278             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
279               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
280               col =  in[j];
281               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
282               B = baij->B;
283               b = (Mat_SeqBAIJ*)(B)->data;
284               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
285               ba=b->a;
286             } else col += in[j]%bs;
287           } else col = in[j];
288           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
289           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
290           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
291         }
292       }
293     } else {  /* off processor entry */
294       if (!baij->donotstash) {
295         n_loc = 0;
296         for (j=0; j<n; j++){
297           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
298           in_loc[n_loc] = in[j];
299           if (roworiented) {
300             v_loc[n_loc] = v[i*n+j];
301           } else {
302             v_loc[n_loc] = v[j*m+i];
303           }
304           n_loc++;
305         }
306         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
307       }
308     }
309   }
310 
311   if(!baij->donotstash){
312     ierr = PetscFree(in_loc);CHKERRQ(ierr);
313     ierr = PetscFree(v_loc);CHKERRQ(ierr);
314   }
315   PetscFunctionReturn(0);
316 }
317 
318 #undef __FUNCT__
319 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
320 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
321 {
322   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
323   const MatScalar *value;
324   MatScalar       *barray=baij->barray;
325   PetscTruth      roworiented = baij->roworiented;
326   PetscErrorCode  ierr;
327   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
328   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
329   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
330 
331   PetscFunctionBegin;
332   if(!barray) {
333     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
334     baij->barray = barray;
335   }
336 
337   if (roworiented) {
338     stepval = (n-1)*bs;
339   } else {
340     stepval = (m-1)*bs;
341   }
342   for (i=0; i<m; i++) {
343     if (im[i] < 0) continue;
344 #if defined(PETSC_USE_DEBUG)
345     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
346 #endif
347     if (im[i] >= rstart && im[i] < rend) {
348       row = im[i] - rstart;
349       for (j=0; j<n; j++) {
350         /* If NumCol = 1 then a copy is not required */
351         if ((roworiented) && (n == 1)) {
352           barray = (MatScalar*) v + i*bs2;
353         } else if((!roworiented) && (m == 1)) {
354           barray = (MatScalar*) v + j*bs2;
355         } else { /* Here a copy is required */
356           if (roworiented) {
357             value = v + i*(stepval+bs)*bs + j*bs;
358           } else {
359             value = v + j*(stepval+bs)*bs + i*bs;
360           }
361           for (ii=0; ii<bs; ii++,value+=stepval) {
362             for (jj=0; jj<bs; jj++) {
363               *barray++  = *value++;
364             }
365           }
366           barray -=bs2;
367         }
368 
369         if (in[j] >= cstart && in[j] < cend){
370           col  = in[j] - cstart;
371           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
372         }
373         else if (in[j] < 0) continue;
374 #if defined(PETSC_USE_DEBUG)
375         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
376 #endif
377         else {
378           if (mat->was_assembled) {
379             if (!baij->colmap) {
380               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
381             }
382 
383 #if defined(PETSC_USE_DEBUG)
384 #if defined (PETSC_USE_CTABLE)
385             { PetscInt data;
386               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
387               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
388             }
389 #else
390             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
391 #endif
392 #endif
393 #if defined (PETSC_USE_CTABLE)
394 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
395             col  = (col - 1)/bs;
396 #else
397             col = (baij->colmap[in[j]] - 1)/bs;
398 #endif
399             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
400               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
401               col =  in[j];
402             }
403           }
404           else col = in[j];
405           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
406         }
407       }
408     } else {
409       if (!baij->donotstash) {
410         if (roworiented) {
411           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
412         } else {
413           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
414         }
415       }
416     }
417   }
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatGetValues_MPISBAIJ"
423 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
424 {
425   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
426   PetscErrorCode ierr;
427   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
428   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
429 
430   PetscFunctionBegin;
431   for (i=0; i<m; i++) {
432     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
433     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
434     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
435       row = idxm[i] - bsrstart;
436       for (j=0; j<n; j++) {
437         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
438         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
439         if (idxn[j] >= bscstart && idxn[j] < bscend){
440           col = idxn[j] - bscstart;
441           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
442         } else {
443           if (!baij->colmap) {
444             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
445           }
446 #if defined (PETSC_USE_CTABLE)
447           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
448           data --;
449 #else
450           data = baij->colmap[idxn[j]/bs]-1;
451 #endif
452           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
453           else {
454             col  = data + idxn[j]%bs;
455             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
456           }
457         }
458       }
459     } else {
460       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
461     }
462   }
463  PetscFunctionReturn(0);
464 }
465 
466 #undef __FUNCT__
467 #define __FUNCT__ "MatNorm_MPISBAIJ"
468 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
469 {
470   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
471   PetscErrorCode ierr;
472   PetscReal      sum[2],*lnorm2;
473 
474   PetscFunctionBegin;
475   if (baij->size == 1) {
476     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
477   } else {
478     if (type == NORM_FROBENIUS) {
479       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
480       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
481       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
482       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
483       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
484       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
485       *norm = sqrt(sum[0] + 2*sum[1]);
486       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
487     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
488       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
489       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
490       PetscReal    *rsum,*rsum2,vabs;
491       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstart,nz;
492       PetscInt     brow,bcol,col,bs=baij->A->bs,row,grow,gcol,mbs=amat->mbs;
493       MatScalar    *v;
494 
495       ierr  = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr);
496       rsum2 = rsum + mat->N;
497       ierr  = PetscMemzero(rsum,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
498       /* Amat */
499       v = amat->a; jj = amat->j;
500       for (brow=0; brow<mbs; brow++) {
501         grow = bs*(rstart + brow);
502         nz = amat->i[brow+1] - amat->i[brow];
503         for (bcol=0; bcol<nz; bcol++){
504           gcol = bs*(rstart + *jj); jj++;
505           for (col=0; col<bs; col++){
506             for (row=0; row<bs; row++){
507               vabs = PetscAbsScalar(*v); v++;
508               rsum[gcol+col] += vabs;
509               /* non-diagonal block */
510               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
511             }
512           }
513         }
514       }
515       /* Bmat */
516       v = bmat->a; jj = bmat->j;
517       for (brow=0; brow<mbs; brow++) {
518         grow = bs*(rstart + brow);
519         nz = bmat->i[brow+1] - bmat->i[brow];
520         for (bcol=0; bcol<nz; bcol++){
521           gcol = bs*garray[*jj]; jj++;
522           for (col=0; col<bs; col++){
523             for (row=0; row<bs; row++){
524               vabs = PetscAbsScalar(*v); v++;
525               rsum[gcol+col] += vabs;
526               rsum[grow+row] += vabs;
527             }
528           }
529         }
530       }
531       ierr = MPI_Allreduce(rsum,rsum2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
532       *norm = 0.0;
533       for (col=0; col<mat->N; col++) {
534         if (rsum2[col] > *norm) *norm = rsum2[col];
535       }
536       ierr = PetscFree(rsum);CHKERRQ(ierr);
537     } else {
538       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
539     }
540   }
541   PetscFunctionReturn(0);
542 }
543 
544 #undef __FUNCT__
545 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
546 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
547 {
548   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
549   PetscErrorCode ierr;
550   PetscInt       nstash,reallocs;
551   InsertMode     addv;
552 
553   PetscFunctionBegin;
554   if (baij->donotstash) {
555     PetscFunctionReturn(0);
556   }
557 
558   /* make sure all processors are either in INSERTMODE or ADDMODE */
559   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
560   if (addv == (ADD_VALUES|INSERT_VALUES)) {
561     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
562   }
563   mat->insertmode = addv; /* in case this processor had no cache */
564 
565   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
566   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
567   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
568   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
569   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
570   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
571   PetscFunctionReturn(0);
572 }
573 
574 #undef __FUNCT__
575 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
576 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
577 {
578   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
579   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
580   Mat_SeqBAIJ    *b=(Mat_SeqBAIJ*)baij->B->data;
581   PetscErrorCode ierr;
582   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
583   PetscInt       *row,*col,other_disassembled;
584   PetscMPIInt    n;
585   PetscTruth     r1,r2,r3;
586   MatScalar      *val;
587   InsertMode     addv = mat->insertmode;
588 
589   PetscFunctionBegin;
590 
591   if (!baij->donotstash) {
592     while (1) {
593       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
594       if (!flg) break;
595 
596       for (i=0; i<n;) {
597         /* Now identify the consecutive vals belonging to the same row */
598         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
599         if (j < n) ncols = j-i;
600         else       ncols = n-i;
601         /* Now assemble all these values with a single function call */
602         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
603         i = j;
604       }
605     }
606     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
607     /* Now process the block-stash. Since the values are stashed column-oriented,
608        set the roworiented flag to column oriented, and after MatSetValues()
609        restore the original flags */
610     r1 = baij->roworiented;
611     r2 = a->roworiented;
612     r3 = b->roworiented;
613     baij->roworiented = PETSC_FALSE;
614     a->roworiented    = PETSC_FALSE;
615     b->roworiented    = PETSC_FALSE;
616     while (1) {
617       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
618       if (!flg) break;
619 
620       for (i=0; i<n;) {
621         /* Now identify the consecutive vals belonging to the same row */
622         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
623         if (j < n) ncols = j-i;
624         else       ncols = n-i;
625         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
626         i = j;
627       }
628     }
629     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
630     baij->roworiented = r1;
631     a->roworiented    = r2;
632     b->roworiented    = r3;
633   }
634 
635   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
636   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
637 
638   /* determine if any processor has disassembled, if so we must
639      also disassemble ourselfs, in order that we may reassemble. */
640   /*
641      if nonzero structure of submatrix B cannot change then we know that
642      no processor disassembled thus we can skip this stuff
643   */
644   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
645     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
646     if (mat->was_assembled && !other_disassembled) {
647       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
648     }
649   }
650 
651   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
652     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
653   }
654   b->compressedrow.use = PETSC_TRUE;
655   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
656   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
657 
658   if (baij->rowvalues) {
659     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
660     baij->rowvalues = 0;
661   }
662 
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
668 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
669 {
670   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
671   PetscErrorCode    ierr;
672   PetscInt          bs = mat->bs;
673   PetscMPIInt       size = baij->size,rank = baij->rank;
674   PetscTruth        iascii,isdraw;
675   PetscViewer       sviewer;
676   PetscViewerFormat format;
677 
678   PetscFunctionBegin;
679   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
680   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
681   if (iascii) {
682     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
683     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
684       MatInfo info;
685       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
686       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
687       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
688               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
689               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
690       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
691       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
692       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
693       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
694       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
695       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
696       PetscFunctionReturn(0);
697     } else if (format == PETSC_VIEWER_ASCII_INFO) {
698       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
699       PetscFunctionReturn(0);
700     }
701   }
702 
703   if (isdraw) {
704     PetscDraw       draw;
705     PetscTruth isnull;
706     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
707     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
708   }
709 
710   if (size == 1) {
711     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
712     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
713   } else {
714     /* assemble the entire matrix onto first processor. */
715     Mat         A;
716     Mat_SeqSBAIJ *Aloc;
717     Mat_SeqBAIJ *Bloc;
718     PetscInt         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
719     MatScalar   *a;
720 
721     /* Should this be the same type as mat? */
722     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
723     if (!rank) {
724       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
725     } else {
726       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
727     }
728     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
729     ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
730     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
731 
732     /* copy over the A part */
733     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
734     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
735     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
736 
737     for (i=0; i<mbs; i++) {
738       rvals[0] = bs*(baij->rstart + i);
739       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
740       for (j=ai[i]; j<ai[i+1]; j++) {
741         col = (baij->cstart+aj[j])*bs;
742         for (k=0; k<bs; k++) {
743           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
744           col++; a += bs;
745         }
746       }
747     }
748     /* copy over the B part */
749     Bloc = (Mat_SeqBAIJ*)baij->B->data;
750     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
751     for (i=0; i<mbs; i++) {
752       rvals[0] = bs*(baij->rstart + i);
753       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
754       for (j=ai[i]; j<ai[i+1]; j++) {
755         col = baij->garray[aj[j]]*bs;
756         for (k=0; k<bs; k++) {
757           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
758           col++; a += bs;
759         }
760       }
761     }
762     ierr = PetscFree(rvals);CHKERRQ(ierr);
763     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
764     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
765     /*
766        Everyone has to call to draw the matrix since the graphics waits are
767        synchronized across all processors that share the PetscDraw object
768     */
769     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
770     if (!rank) {
771       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
772       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
773     }
774     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
775     ierr = MatDestroy(A);CHKERRQ(ierr);
776   }
777   PetscFunctionReturn(0);
778 }
779 
780 #undef __FUNCT__
781 #define __FUNCT__ "MatView_MPISBAIJ"
782 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
783 {
784   PetscErrorCode ierr;
785   PetscTruth     iascii,isdraw,issocket,isbinary;
786 
787   PetscFunctionBegin;
788   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
789   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
790   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
791   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
792   if (iascii || isdraw || issocket || isbinary) {
793     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
794   } else {
795     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
796   }
797   PetscFunctionReturn(0);
798 }
799 
800 #undef __FUNCT__
801 #define __FUNCT__ "MatDestroy_MPISBAIJ"
802 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
803 {
804   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
805   PetscErrorCode ierr;
806 
807   PetscFunctionBegin;
808 #if defined(PETSC_USE_LOG)
809   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
810 #endif
811   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
812   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
813   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
814   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
815   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
816 #if defined (PETSC_USE_CTABLE)
817   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
818 #else
819   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
820 #endif
821   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
822   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
823   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
824   if (baij->slvec0) {
825     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
826     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
827   }
828   if (baij->slvec1) {
829     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
830     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
831     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
832   }
833   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
834   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
835   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
836   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
837 #if defined(PETSC_USE_MAT_SINGLE)
838   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
839 #endif
840   ierr = PetscFree(baij);CHKERRQ(ierr);
841 
842   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
843   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
844   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
845   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
846   PetscFunctionReturn(0);
847 }
848 
849 #undef __FUNCT__
850 #define __FUNCT__ "MatMult_MPISBAIJ"
851 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
852 {
853   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
854   PetscErrorCode ierr;
855   PetscInt       nt,mbs=a->mbs,bs=A->bs;
856   PetscScalar    *x,*from,zero=0.0;
857 
858   PetscFunctionBegin;
859   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
860   if (nt != A->n) {
861     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
862   }
863   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
864   if (nt != A->m) {
865     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
866   }
867 
868   /* diagonal part */
869   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
870   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
871 
872   /* subdiagonal part */
873   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
874 
875   /* copy x into the vec slvec0 */
876   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
877   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
878   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
879   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
880 
881   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
882   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
883   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
884 
885   /* supperdiagonal part */
886   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
887 
888   PetscFunctionReturn(0);
889 }
890 
891 #undef __FUNCT__
892 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
893 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
894 {
895   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
896   PetscErrorCode ierr;
897   PetscInt       nt;
898 
899   PetscFunctionBegin;
900   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
901   if (nt != A->n) {
902     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
903   }
904   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
905   if (nt != A->m) {
906     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
907   }
908 
909   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
910   /* do diagonal part */
911   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
912   /* do supperdiagonal part */
913   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
914   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
915   /* do subdiagonal part */
916   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
917   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
918   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
919 
920   PetscFunctionReturn(0);
921 }
922 
923 #undef __FUNCT__
924 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
925 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
926 {
927   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
928   PetscErrorCode ierr;
929   PetscInt       mbs=a->mbs,bs=A->bs;
930   PetscScalar    *x,*from,zero=0.0;
931 
932   PetscFunctionBegin;
933   /*
934   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
935   PetscSynchronizedFlush(A->comm);
936   */
937   /* diagonal part */
938   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
939   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
940 
941   /* subdiagonal part */
942   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
943 
944   /* copy x into the vec slvec0 */
945   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
946   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
947   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
948   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
949 
950   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
951   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
952   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
953 
954   /* supperdiagonal part */
955   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
956 
957   PetscFunctionReturn(0);
958 }
959 
960 #undef __FUNCT__
961 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
962 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
963 {
964   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
965   PetscErrorCode ierr;
966 
967   PetscFunctionBegin;
968   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
969   /* do diagonal part */
970   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
971   /* do supperdiagonal part */
972   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
973   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
974 
975   /* do subdiagonal part */
976   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
977   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
978   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
979 
980   PetscFunctionReturn(0);
981 }
982 
983 /*
984   This only works correctly for square matrices where the subblock A->A is the
985    diagonal block
986 */
987 #undef __FUNCT__
988 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
989 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
990 {
991   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
992   PetscErrorCode ierr;
993 
994   PetscFunctionBegin;
995   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
996   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
997   PetscFunctionReturn(0);
998 }
999 
1000 #undef __FUNCT__
1001 #define __FUNCT__ "MatScale_MPISBAIJ"
1002 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1003 {
1004   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1005   PetscErrorCode ierr;
1006 
1007   PetscFunctionBegin;
1008   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1009   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 #undef __FUNCT__
1014 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1015 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1016 {
1017   PetscFunctionBegin;
1018   if (matin) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format");
1019   PetscFunctionReturn(0);
1020 }
1021 
1022 #undef __FUNCT__
1023 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1024 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1025 {
1026   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1027 
1028   PetscFunctionBegin;
1029   if (!baij->getrowactive) {
1030     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1031   }
1032   baij->getrowactive = PETSC_FALSE;
1033   PetscFunctionReturn(0);
1034 }
1035 
1036 #undef __FUNCT__
1037 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1038 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1039 {
1040   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1041   PetscErrorCode ierr;
1042 
1043   PetscFunctionBegin;
1044   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1045   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1046   PetscFunctionReturn(0);
1047 }
1048 
1049 #undef __FUNCT__
1050 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1051 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1052 {
1053   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1054   Mat            A = a->A,B = a->B;
1055   PetscErrorCode ierr;
1056   PetscReal      isend[5],irecv[5];
1057 
1058   PetscFunctionBegin;
1059   info->block_size     = (PetscReal)matin->bs;
1060   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1061   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1062   isend[3] = info->memory;  isend[4] = info->mallocs;
1063   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1064   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1065   isend[3] += info->memory;  isend[4] += info->mallocs;
1066   if (flag == MAT_LOCAL) {
1067     info->nz_used      = isend[0];
1068     info->nz_allocated = isend[1];
1069     info->nz_unneeded  = isend[2];
1070     info->memory       = isend[3];
1071     info->mallocs      = isend[4];
1072   } else if (flag == MAT_GLOBAL_MAX) {
1073     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1074     info->nz_used      = irecv[0];
1075     info->nz_allocated = irecv[1];
1076     info->nz_unneeded  = irecv[2];
1077     info->memory       = irecv[3];
1078     info->mallocs      = irecv[4];
1079   } else if (flag == MAT_GLOBAL_SUM) {
1080     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1081     info->nz_used      = irecv[0];
1082     info->nz_allocated = irecv[1];
1083     info->nz_unneeded  = irecv[2];
1084     info->memory       = irecv[3];
1085     info->mallocs      = irecv[4];
1086   } else {
1087     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1088   }
1089   info->rows_global       = (PetscReal)A->M;
1090   info->columns_global    = (PetscReal)A->N;
1091   info->rows_local        = (PetscReal)A->m;
1092   info->columns_local     = (PetscReal)A->N;
1093   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1094   info->fill_ratio_needed = 0;
1095   info->factor_mallocs    = 0;
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNCT__
1100 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1101 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1102 {
1103   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1104   PetscErrorCode ierr;
1105 
1106   PetscFunctionBegin;
1107   switch (op) {
1108   case MAT_NO_NEW_NONZERO_LOCATIONS:
1109   case MAT_YES_NEW_NONZERO_LOCATIONS:
1110   case MAT_COLUMNS_UNSORTED:
1111   case MAT_COLUMNS_SORTED:
1112   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1113   case MAT_KEEP_ZEROED_ROWS:
1114   case MAT_NEW_NONZERO_LOCATION_ERR:
1115     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1116     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1117     break;
1118   case MAT_ROW_ORIENTED:
1119     a->roworiented = PETSC_TRUE;
1120     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1121     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1122     break;
1123   case MAT_ROWS_SORTED:
1124   case MAT_ROWS_UNSORTED:
1125   case MAT_YES_NEW_DIAGONALS:
1126     ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr);
1127     break;
1128   case MAT_COLUMN_ORIENTED:
1129     a->roworiented = PETSC_FALSE;
1130     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1131     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1132     break;
1133   case MAT_IGNORE_OFF_PROC_ENTRIES:
1134     a->donotstash = PETSC_TRUE;
1135     break;
1136   case MAT_NO_NEW_DIAGONALS:
1137     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1138   case MAT_USE_HASH_TABLE:
1139     a->ht_flag = PETSC_TRUE;
1140     break;
1141   case MAT_NOT_SYMMETRIC:
1142   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1143   case MAT_HERMITIAN:
1144     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1145   case MAT_SYMMETRIC:
1146   case MAT_STRUCTURALLY_SYMMETRIC:
1147   case MAT_NOT_HERMITIAN:
1148   case MAT_SYMMETRY_ETERNAL:
1149   case MAT_NOT_SYMMETRY_ETERNAL:
1150     break;
1151   default:
1152     SETERRQ(PETSC_ERR_SUP,"unknown option");
1153   }
1154   PetscFunctionReturn(0);
1155 }
1156 
1157 #undef __FUNCT__
1158 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1159 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1160 {
1161   PetscErrorCode ierr;
1162   PetscFunctionBegin;
1163   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 #undef __FUNCT__
1168 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1169 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1170 {
1171   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1172   Mat            a=baij->A, b=baij->B;
1173   PetscErrorCode ierr;
1174   PetscInt       nv,m,n;
1175   PetscTruth     flg;
1176 
1177   PetscFunctionBegin;
1178   if (ll != rr){
1179     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1180     if (!flg)
1181       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1182   }
1183   if (!ll) PetscFunctionReturn(0);
1184 
1185   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1186   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1187 
1188   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1189   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1190 
1191   ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1192 
1193   /* left diagonalscale the off-diagonal part */
1194   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1195 
1196   /* scale the diagonal part */
1197   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1198 
1199   /* right diagonalscale the off-diagonal part */
1200   ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1201   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 #undef __FUNCT__
1206 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1207 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1208 {
1209   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1210   MPI_Comm          comm = A->comm;
1211   static PetscTruth called = PETSC_FALSE;
1212   PetscErrorCode    ierr;
1213 
1214   PetscFunctionBegin;
1215   if (!a->rank) {
1216     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1217   }
1218   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1219   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1220   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1221   PetscFunctionReturn(0);
1222 }
1223 
1224 #undef __FUNCT__
1225 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1226 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1227 {
1228   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1229   PetscErrorCode ierr;
1230 
1231   PetscFunctionBegin;
1232   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1237 
1238 #undef __FUNCT__
1239 #define __FUNCT__ "MatEqual_MPISBAIJ"
1240 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1241 {
1242   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1243   Mat            a,b,c,d;
1244   PetscTruth     flg;
1245   PetscErrorCode ierr;
1246 
1247   PetscFunctionBegin;
1248   a = matA->A; b = matA->B;
1249   c = matB->A; d = matB->B;
1250 
1251   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1252   if (flg) {
1253     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1254   }
1255   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1261 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1262 {
1263   PetscErrorCode ierr;
1264 
1265   PetscFunctionBegin;
1266   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 #undef __FUNCT__
1271 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1272 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1273 {
1274   PetscErrorCode ierr;
1275   PetscInt       i;
1276   PetscTruth     flg;
1277 
1278   PetscFunctionBegin;
1279   for (i=0; i<n; i++) {
1280     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1281     if (!flg) {
1282       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1283     }
1284   }
1285   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 
1290 /* -------------------------------------------------------------------*/
1291 static struct _MatOps MatOps_Values = {
1292        MatSetValues_MPISBAIJ,
1293        MatGetRow_MPISBAIJ,
1294        MatRestoreRow_MPISBAIJ,
1295        MatMult_MPISBAIJ,
1296 /* 4*/ MatMultAdd_MPISBAIJ,
1297        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1298        MatMultAdd_MPISBAIJ,
1299        0,
1300        0,
1301        0,
1302 /*10*/ 0,
1303        0,
1304        0,
1305        MatRelax_MPISBAIJ,
1306        MatTranspose_MPISBAIJ,
1307 /*15*/ MatGetInfo_MPISBAIJ,
1308        MatEqual_MPISBAIJ,
1309        MatGetDiagonal_MPISBAIJ,
1310        MatDiagonalScale_MPISBAIJ,
1311        MatNorm_MPISBAIJ,
1312 /*20*/ MatAssemblyBegin_MPISBAIJ,
1313        MatAssemblyEnd_MPISBAIJ,
1314        0,
1315        MatSetOption_MPISBAIJ,
1316        MatZeroEntries_MPISBAIJ,
1317 /*25*/ 0,
1318        0,
1319        0,
1320        0,
1321        0,
1322 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1323        0,
1324        0,
1325        0,
1326        0,
1327 /*35*/ MatDuplicate_MPISBAIJ,
1328        0,
1329        0,
1330        0,
1331        0,
1332 /*40*/ 0,
1333        MatGetSubMatrices_MPISBAIJ,
1334        MatIncreaseOverlap_MPISBAIJ,
1335        MatGetValues_MPISBAIJ,
1336        0,
1337 /*45*/ MatPrintHelp_MPISBAIJ,
1338        MatScale_MPISBAIJ,
1339        0,
1340        0,
1341        0,
1342 /*50*/ 0,
1343        0,
1344        0,
1345        0,
1346        0,
1347 /*55*/ 0,
1348        0,
1349        MatSetUnfactored_MPISBAIJ,
1350        0,
1351        MatSetValuesBlocked_MPISBAIJ,
1352 /*60*/ 0,
1353        0,
1354        0,
1355        MatGetPetscMaps_Petsc,
1356        0,
1357 /*65*/ 0,
1358        0,
1359        0,
1360        0,
1361        0,
1362 /*70*/ MatGetRowMax_MPISBAIJ,
1363        0,
1364        0,
1365        0,
1366        0,
1367 /*75*/ 0,
1368        0,
1369        0,
1370        0,
1371        0,
1372 /*80*/ 0,
1373        0,
1374        0,
1375        0,
1376        MatLoad_MPISBAIJ,
1377 /*85*/ 0,
1378        0,
1379        0,
1380        0,
1381        0,
1382 /*90*/ 0,
1383        0,
1384        0,
1385        0,
1386        0,
1387 /*95*/ 0,
1388        0,
1389        0,
1390        0};
1391 
1392 
1393 EXTERN_C_BEGIN
1394 #undef __FUNCT__
1395 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1396 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1397 {
1398   PetscFunctionBegin;
1399   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1400   *iscopy = PETSC_FALSE;
1401   PetscFunctionReturn(0);
1402 }
1403 EXTERN_C_END
1404 
1405 EXTERN_C_BEGIN
1406 #undef __FUNCT__
1407 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1408 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1409 {
1410   Mat_MPISBAIJ   *b;
1411   PetscErrorCode ierr;
1412   PetscInt       i,mbs,Mbs;
1413 
1414   PetscFunctionBegin;
1415   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1416 
1417   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1418   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1419   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1420   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1421   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1422   if (d_nnz) {
1423     for (i=0; i<B->m/bs; i++) {
1424       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1425     }
1426   }
1427   if (o_nnz) {
1428     for (i=0; i<B->m/bs; i++) {
1429       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1430     }
1431   }
1432   B->preallocated = PETSC_TRUE;
1433   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1434   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1435   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1436   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1437 
1438   b   = (Mat_MPISBAIJ*)B->data;
1439   mbs = B->m/bs;
1440   Mbs = B->M/bs;
1441   if (mbs*bs != B->m) {
1442     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs);
1443   }
1444 
1445   B->bs  = bs;
1446   b->bs2 = bs*bs;
1447   b->mbs = mbs;
1448   b->nbs = mbs;
1449   b->Mbs = Mbs;
1450   b->Nbs = Mbs;
1451 
1452   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
1453   b->rowners[0]    = 0;
1454   for (i=2; i<=b->size; i++) {
1455     b->rowners[i] += b->rowners[i-1];
1456   }
1457   b->rstart    = b->rowners[b->rank];
1458   b->rend      = b->rowners[b->rank+1];
1459   b->cstart    = b->rstart;
1460   b->cend      = b->rend;
1461   for (i=0; i<=b->size; i++) {
1462     b->rowners_bs[i] = b->rowners[i]*bs;
1463   }
1464   b->rstart_bs = b-> rstart*bs;
1465   b->rend_bs   = b->rend*bs;
1466 
1467   b->cstart_bs = b->cstart*bs;
1468   b->cend_bs   = b->cend*bs;
1469 
1470   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1471   ierr = MatSetSizes(b->A,B->m,B->m,B->m,B->m);CHKERRQ(ierr);
1472   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1473   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1474   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1475 
1476   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1477   ierr = MatSetSizes(b->B,B->m,B->M,B->m,B->M);CHKERRQ(ierr);
1478   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1479   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1480   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1481 
1482   /* build cache for off array entries formed */
1483   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1484 
1485   PetscFunctionReturn(0);
1486 }
1487 EXTERN_C_END
1488 
1489 /*MC
1490    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1491    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1492 
1493    Options Database Keys:
1494 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1495 
1496   Level: beginner
1497 
1498 .seealso: MatCreateMPISBAIJ
1499 M*/
1500 
1501 EXTERN_C_BEGIN
1502 #undef __FUNCT__
1503 #define __FUNCT__ "MatCreate_MPISBAIJ"
1504 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1505 {
1506   Mat_MPISBAIJ   *b;
1507   PetscErrorCode ierr;
1508   PetscTruth     flg;
1509 
1510   PetscFunctionBegin;
1511 
1512   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1513   B->data = (void*)b;
1514   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1515 
1516   B->ops->destroy    = MatDestroy_MPISBAIJ;
1517   B->ops->view       = MatView_MPISBAIJ;
1518   B->mapping    = 0;
1519   B->factor     = 0;
1520   B->assembled  = PETSC_FALSE;
1521 
1522   B->insertmode = NOT_SET_VALUES;
1523   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1524   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1525 
1526   /* build local table of row and column ownerships */
1527   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
1528   b->cowners    = b->rowners + b->size + 2;
1529   b->rowners_bs = b->cowners + b->size + 2;
1530   ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1531 
1532   /* build cache for off array entries formed */
1533   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1534   b->donotstash  = PETSC_FALSE;
1535   b->colmap      = PETSC_NULL;
1536   b->garray      = PETSC_NULL;
1537   b->roworiented = PETSC_TRUE;
1538 
1539 #if defined(PETSC_USE_MAT_SINGLE)
1540   /* stuff for MatSetValues_XXX in single precision */
1541   b->setvalueslen     = 0;
1542   b->setvaluescopy    = PETSC_NULL;
1543 #endif
1544 
1545   /* stuff used in block assembly */
1546   b->barray       = 0;
1547 
1548   /* stuff used for matrix vector multiply */
1549   b->lvec         = 0;
1550   b->Mvctx        = 0;
1551   b->slvec0       = 0;
1552   b->slvec0b      = 0;
1553   b->slvec1       = 0;
1554   b->slvec1a      = 0;
1555   b->slvec1b      = 0;
1556   b->sMvctx       = 0;
1557 
1558   /* stuff for MatGetRow() */
1559   b->rowindices   = 0;
1560   b->rowvalues    = 0;
1561   b->getrowactive = PETSC_FALSE;
1562 
1563   /* hash table stuff */
1564   b->ht           = 0;
1565   b->hd           = 0;
1566   b->ht_size      = 0;
1567   b->ht_flag      = PETSC_FALSE;
1568   b->ht_fact      = 0;
1569   b->ht_total_ct  = 0;
1570   b->ht_insert_ct = 0;
1571 
1572   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1573   if (flg) {
1574     PetscReal fact = 1.39;
1575     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1576     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1577     if (fact <= 1.0) fact = 1.39;
1578     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1579     ierr = PetscLogInfo((0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr);
1580   }
1581   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1582                                      "MatStoreValues_MPISBAIJ",
1583                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1584   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1585                                      "MatRetrieveValues_MPISBAIJ",
1586                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1587   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1588                                      "MatGetDiagonalBlock_MPISBAIJ",
1589                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1590   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1591                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1592                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1593   B->symmetric                  = PETSC_TRUE;
1594   B->structurally_symmetric     = PETSC_TRUE;
1595   B->symmetric_set              = PETSC_TRUE;
1596   B->structurally_symmetric_set = PETSC_TRUE;
1597   PetscFunctionReturn(0);
1598 }
1599 EXTERN_C_END
1600 
1601 /*MC
1602    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1603 
1604    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1605    and MATMPISBAIJ otherwise.
1606 
1607    Options Database Keys:
1608 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1609 
1610   Level: beginner
1611 
1612 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1613 M*/
1614 
1615 EXTERN_C_BEGIN
1616 #undef __FUNCT__
1617 #define __FUNCT__ "MatCreate_SBAIJ"
1618 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1619 {
1620   PetscErrorCode ierr;
1621   PetscMPIInt    size;
1622 
1623   PetscFunctionBegin;
1624   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1625   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1626   if (size == 1) {
1627     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1628   } else {
1629     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1630   }
1631   PetscFunctionReturn(0);
1632 }
1633 EXTERN_C_END
1634 
1635 #undef __FUNCT__
1636 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1637 /*@C
1638    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1639    the user should preallocate the matrix storage by setting the parameters
1640    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1641    performance can be increased by more than a factor of 50.
1642 
1643    Collective on Mat
1644 
1645    Input Parameters:
1646 +  A - the matrix
1647 .  bs   - size of blockk
1648 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1649            submatrix  (same for all local rows)
1650 .  d_nnz - array containing the number of block nonzeros in the various block rows
1651            in the upper triangular and diagonal part of the in diagonal portion of the local
1652            (possibly different for each block row) or PETSC_NULL.  You must leave room
1653            for the diagonal entry even if it is zero.
1654 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1655            submatrix (same for all local rows).
1656 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1657            off-diagonal portion of the local submatrix (possibly different for
1658            each block row) or PETSC_NULL.
1659 
1660 
1661    Options Database Keys:
1662 .   -mat_no_unroll - uses code that does not unroll the loops in the
1663                      block calculations (much slower)
1664 .   -mat_block_size - size of the blocks to use
1665 
1666    Notes:
1667 
1668    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1669    than it must be used on all processors that share the object for that argument.
1670 
1671    If the *_nnz parameter is given then the *_nz parameter is ignored
1672 
1673    Storage Information:
1674    For a square global matrix we define each processor's diagonal portion
1675    to be its local rows and the corresponding columns (a square submatrix);
1676    each processor's off-diagonal portion encompasses the remainder of the
1677    local matrix (a rectangular submatrix).
1678 
1679    The user can specify preallocated storage for the diagonal part of
1680    the local submatrix with either d_nz or d_nnz (not both).  Set
1681    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1682    memory allocation.  Likewise, specify preallocated storage for the
1683    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1684 
1685    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1686    the figure below we depict these three local rows and all columns (0-11).
1687 
1688 .vb
1689            0 1 2 3 4 5 6 7 8 9 10 11
1690           -------------------
1691    row 3  |  o o o d d d o o o o o o
1692    row 4  |  o o o d d d o o o o o o
1693    row 5  |  o o o d d d o o o o o o
1694           -------------------
1695 .ve
1696 
1697    Thus, any entries in the d locations are stored in the d (diagonal)
1698    submatrix, and any entries in the o locations are stored in the
1699    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1700    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1701 
1702    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1703    plus the diagonal part of the d matrix,
1704    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1705    In general, for PDE problems in which most nonzeros are near the diagonal,
1706    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1707    or you will get TERRIBLE performance; see the users' manual chapter on
1708    matrices.
1709 
1710    Level: intermediate
1711 
1712 .keywords: matrix, block, aij, compressed row, sparse, parallel
1713 
1714 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1715 @*/
1716 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1717 {
1718   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1719 
1720   PetscFunctionBegin;
1721   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1722   if (f) {
1723     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1724   }
1725   PetscFunctionReturn(0);
1726 }
1727 
1728 #undef __FUNCT__
1729 #define __FUNCT__ "MatCreateMPISBAIJ"
1730 /*@C
1731    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1732    (block compressed row).  For good matrix assembly performance
1733    the user should preallocate the matrix storage by setting the parameters
1734    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1735    performance can be increased by more than a factor of 50.
1736 
1737    Collective on MPI_Comm
1738 
1739    Input Parameters:
1740 +  comm - MPI communicator
1741 .  bs   - size of blockk
1742 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1743            This value should be the same as the local size used in creating the
1744            y vector for the matrix-vector product y = Ax.
1745 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1746            This value should be the same as the local size used in creating the
1747            x vector for the matrix-vector product y = Ax.
1748 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1749 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1750 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1751            submatrix  (same for all local rows)
1752 .  d_nnz - array containing the number of block nonzeros in the various block rows
1753            in the upper triangular portion of the in diagonal portion of the local
1754            (possibly different for each block block row) or PETSC_NULL.
1755            You must leave room for the diagonal entry even if it is zero.
1756 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1757            submatrix (same for all local rows).
1758 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1759            off-diagonal portion of the local submatrix (possibly different for
1760            each block row) or PETSC_NULL.
1761 
1762    Output Parameter:
1763 .  A - the matrix
1764 
1765    Options Database Keys:
1766 .   -mat_no_unroll - uses code that does not unroll the loops in the
1767                      block calculations (much slower)
1768 .   -mat_block_size - size of the blocks to use
1769 .   -mat_mpi - use the parallel matrix data structures even on one processor
1770                (defaults to using SeqBAIJ format on one processor)
1771 
1772    Notes:
1773    The number of rows and columns must be divisible by blocksize.
1774 
1775    The user MUST specify either the local or global matrix dimensions
1776    (possibly both).
1777 
1778    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1779    than it must be used on all processors that share the object for that argument.
1780 
1781    If the *_nnz parameter is given then the *_nz parameter is ignored
1782 
1783    Storage Information:
1784    For a square global matrix we define each processor's diagonal portion
1785    to be its local rows and the corresponding columns (a square submatrix);
1786    each processor's off-diagonal portion encompasses the remainder of the
1787    local matrix (a rectangular submatrix).
1788 
1789    The user can specify preallocated storage for the diagonal part of
1790    the local submatrix with either d_nz or d_nnz (not both).  Set
1791    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1792    memory allocation.  Likewise, specify preallocated storage for the
1793    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1794 
1795    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1796    the figure below we depict these three local rows and all columns (0-11).
1797 
1798 .vb
1799            0 1 2 3 4 5 6 7 8 9 10 11
1800           -------------------
1801    row 3  |  o o o d d d o o o o o o
1802    row 4  |  o o o d d d o o o o o o
1803    row 5  |  o o o d d d o o o o o o
1804           -------------------
1805 .ve
1806 
1807    Thus, any entries in the d locations are stored in the d (diagonal)
1808    submatrix, and any entries in the o locations are stored in the
1809    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1810    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1811 
1812    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1813    plus the diagonal part of the d matrix,
1814    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1815    In general, for PDE problems in which most nonzeros are near the diagonal,
1816    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1817    or you will get TERRIBLE performance; see the users' manual chapter on
1818    matrices.
1819 
1820    Level: intermediate
1821 
1822 .keywords: matrix, block, aij, compressed row, sparse, parallel
1823 
1824 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1825 @*/
1826 
1827 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
1828 {
1829   PetscErrorCode ierr;
1830   PetscMPIInt    size;
1831 
1832   PetscFunctionBegin;
1833   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1834   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1835   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1836   if (size > 1) {
1837     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1838     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1839   } else {
1840     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1841     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1842   }
1843   PetscFunctionReturn(0);
1844 }
1845 
1846 
1847 #undef __FUNCT__
1848 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
1849 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1850 {
1851   Mat            mat;
1852   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1853   PetscErrorCode ierr;
1854   PetscInt       len=0,nt,bs=matin->bs,mbs=oldmat->mbs;
1855   PetscScalar    *array;
1856 
1857   PetscFunctionBegin;
1858   *newmat       = 0;
1859   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
1860   ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr);
1861   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1862   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1863 
1864   mat->factor       = matin->factor;
1865   mat->preallocated = PETSC_TRUE;
1866   mat->assembled    = PETSC_TRUE;
1867   mat->insertmode   = NOT_SET_VALUES;
1868 
1869   a = (Mat_MPISBAIJ*)mat->data;
1870   mat->bs  = matin->bs;
1871   a->bs2   = oldmat->bs2;
1872   a->mbs   = oldmat->mbs;
1873   a->nbs   = oldmat->nbs;
1874   a->Mbs   = oldmat->Mbs;
1875   a->Nbs   = oldmat->Nbs;
1876 
1877   a->rstart       = oldmat->rstart;
1878   a->rend         = oldmat->rend;
1879   a->cstart       = oldmat->cstart;
1880   a->cend         = oldmat->cend;
1881   a->size         = oldmat->size;
1882   a->rank         = oldmat->rank;
1883   a->donotstash   = oldmat->donotstash;
1884   a->roworiented  = oldmat->roworiented;
1885   a->rowindices   = 0;
1886   a->rowvalues    = 0;
1887   a->getrowactive = PETSC_FALSE;
1888   a->barray       = 0;
1889   a->rstart_bs    = oldmat->rstart_bs;
1890   a->rend_bs      = oldmat->rend_bs;
1891   a->cstart_bs    = oldmat->cstart_bs;
1892   a->cend_bs      = oldmat->cend_bs;
1893 
1894   /* hash table stuff */
1895   a->ht           = 0;
1896   a->hd           = 0;
1897   a->ht_size      = 0;
1898   a->ht_flag      = oldmat->ht_flag;
1899   a->ht_fact      = oldmat->ht_fact;
1900   a->ht_total_ct  = 0;
1901   a->ht_insert_ct = 0;
1902 
1903   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1904   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1905   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
1906   if (oldmat->colmap) {
1907 #if defined (PETSC_USE_CTABLE)
1908     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1909 #else
1910     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1911     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1912     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1913 #endif
1914   } else a->colmap = 0;
1915 
1916   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
1917     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1918     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1919     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
1920   } else a->garray = 0;
1921 
1922   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1923   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1924   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1925   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1926 
1927   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
1928   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
1929   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
1930   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
1931 
1932   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
1933   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
1934   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
1935   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
1936   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
1937   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
1938   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
1939   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
1940   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
1941   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
1942   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
1943   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
1944   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
1945 
1946   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
1947   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
1948   a->sMvctx = oldmat->sMvctx;
1949   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
1950 
1951   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1952   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1953   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1954   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
1955   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
1956   *newmat = mat;
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 #include "petscsys.h"
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatLoad_MPISBAIJ"
1964 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
1965 {
1966   Mat            A;
1967   PetscErrorCode ierr;
1968   PetscInt       i,nz,j,rstart,rend;
1969   PetscScalar    *vals,*buf;
1970   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1971   MPI_Status     status;
1972   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners;
1973   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1974   PetscInt       *locrowlens,*procsnz = 0,jj,*mycols,*ibuf;
1975   PetscInt       bs=1,Mbs,mbs,extra_rows;
1976   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
1977   PetscInt       dcount,kmax,k,nzcount,tmp;
1978   int            fd;
1979 
1980   PetscFunctionBegin;
1981   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1982 
1983   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1984   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1985   if (!rank) {
1986     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1987     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1988     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1989     if (header[3] < 0) {
1990       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
1991     }
1992   }
1993 
1994   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1995   M = header[1]; N = header[2];
1996 
1997   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1998 
1999   /*
2000      This code adds extra rows to make sure the number of rows is
2001      divisible by the blocksize
2002   */
2003   Mbs        = M/bs;
2004   extra_rows = bs - M + bs*(Mbs);
2005   if (extra_rows == bs) extra_rows = 0;
2006   else                  Mbs++;
2007   if (extra_rows &&!rank) {
2008     ierr = PetscLogInfo((0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2009   }
2010 
2011   /* determine ownership of all rows */
2012   mbs        = Mbs/size + ((Mbs % size) > rank);
2013   m          = mbs*bs;
2014   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2015   browners   = rowners + size + 1;
2016   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2017   rowners[0] = 0;
2018   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2019   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2020   rstart = rowners[rank];
2021   rend   = rowners[rank+1];
2022 
2023   /* distribute row lengths to all processors */
2024   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2025   if (!rank) {
2026     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2027     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2028     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2029     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2030     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2031     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2032     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2033   } else {
2034     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2035   }
2036 
2037   if (!rank) {   /* procs[0] */
2038     /* calculate the number of nonzeros on each processor */
2039     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2040     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2041     for (i=0; i<size; i++) {
2042       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2043         procsnz[i] += rowlengths[j];
2044       }
2045     }
2046     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2047 
2048     /* determine max buffer needed and allocate it */
2049     maxnz = 0;
2050     for (i=0; i<size; i++) {
2051       maxnz = PetscMax(maxnz,procsnz[i]);
2052     }
2053     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2054 
2055     /* read in my part of the matrix column indices  */
2056     nz     = procsnz[0];
2057     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2058     mycols = ibuf;
2059     if (size == 1)  nz -= extra_rows;
2060     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2061     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2062 
2063     /* read in every ones (except the last) and ship off */
2064     for (i=1; i<size-1; i++) {
2065       nz   = procsnz[i];
2066       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2067       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2068     }
2069     /* read in the stuff for the last proc */
2070     if (size != 1) {
2071       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2072       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2073       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2074       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2075     }
2076     ierr = PetscFree(cols);CHKERRQ(ierr);
2077   } else {  /* procs[i], i>0 */
2078     /* determine buffer space needed for message */
2079     nz = 0;
2080     for (i=0; i<m; i++) {
2081       nz += locrowlens[i];
2082     }
2083     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2084     mycols = ibuf;
2085     /* receive message of column indices*/
2086     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2087     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2088     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2089   }
2090 
2091   /* loop over local rows, determining number of off diagonal entries */
2092   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2093   odlens   = dlens + (rend-rstart);
2094   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2095   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2096   masked1  = mask    + Mbs;
2097   masked2  = masked1 + Mbs;
2098   rowcount = 0; nzcount = 0;
2099   for (i=0; i<mbs; i++) {
2100     dcount  = 0;
2101     odcount = 0;
2102     for (j=0; j<bs; j++) {
2103       kmax = locrowlens[rowcount];
2104       for (k=0; k<kmax; k++) {
2105         tmp = mycols[nzcount++]/bs; /* block col. index */
2106         if (!mask[tmp]) {
2107           mask[tmp] = 1;
2108           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2109           else masked1[dcount++] = tmp; /* entry in diag portion */
2110         }
2111       }
2112       rowcount++;
2113     }
2114 
2115     dlens[i]  = dcount;  /* d_nzz[i] */
2116     odlens[i] = odcount; /* o_nzz[i] */
2117 
2118     /* zero out the mask elements we set */
2119     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2120     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2121   }
2122 
2123   /* create our matrix */
2124   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2125   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2126   ierr = MatSetType(A,type);CHKERRQ(ierr);
2127   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2128   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2129 
2130   if (!rank) {
2131     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2132     /* read in my part of the matrix numerical values  */
2133     nz = procsnz[0];
2134     vals = buf;
2135     mycols = ibuf;
2136     if (size == 1)  nz -= extra_rows;
2137     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2138     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2139 
2140     /* insert into matrix */
2141     jj      = rstart*bs;
2142     for (i=0; i<m; i++) {
2143       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2144       mycols += locrowlens[i];
2145       vals   += locrowlens[i];
2146       jj++;
2147     }
2148 
2149     /* read in other processors (except the last one) and ship out */
2150     for (i=1; i<size-1; i++) {
2151       nz   = procsnz[i];
2152       vals = buf;
2153       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2154       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2155     }
2156     /* the last proc */
2157     if (size != 1){
2158       nz   = procsnz[i] - extra_rows;
2159       vals = buf;
2160       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2161       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2162       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2163     }
2164     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2165 
2166   } else {
2167     /* receive numeric values */
2168     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2169 
2170     /* receive message of values*/
2171     vals   = buf;
2172     mycols = ibuf;
2173     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2174     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2175     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2176 
2177     /* insert into matrix */
2178     jj      = rstart*bs;
2179     for (i=0; i<m; i++) {
2180       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2181       mycols += locrowlens[i];
2182       vals   += locrowlens[i];
2183       jj++;
2184     }
2185   }
2186 
2187   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2188   ierr = PetscFree(buf);CHKERRQ(ierr);
2189   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2190   ierr = PetscFree(rowners);CHKERRQ(ierr);
2191   ierr = PetscFree(dlens);CHKERRQ(ierr);
2192   ierr = PetscFree(mask);CHKERRQ(ierr);
2193   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2194   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2195   *newmat = A;
2196   PetscFunctionReturn(0);
2197 }
2198 
2199 #undef __FUNCT__
2200 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2201 /*XXXXX@
2202    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2203 
2204    Input Parameters:
2205 .  mat  - the matrix
2206 .  fact - factor
2207 
2208    Collective on Mat
2209 
2210    Level: advanced
2211 
2212   Notes:
2213    This can also be set by the command line option: -mat_use_hash_table fact
2214 
2215 .keywords: matrix, hashtable, factor, HT
2216 
2217 .seealso: MatSetOption()
2218 @XXXXX*/
2219 
2220 
2221 #undef __FUNCT__
2222 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2223 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2224 {
2225   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2226   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2227   PetscReal      atmp;
2228   PetscReal      *work,*svalues,*rvalues;
2229   PetscErrorCode ierr;
2230   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2231   PetscMPIInt    rank,size;
2232   PetscInt       *rowners_bs,dest,count,source;
2233   PetscScalar    *va;
2234   MatScalar      *ba;
2235   MPI_Status     stat;
2236 
2237   PetscFunctionBegin;
2238   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2239   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2240 
2241   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2242   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2243 
2244   bs   = A->bs;
2245   mbs  = a->mbs;
2246   Mbs  = a->Mbs;
2247   ba   = b->a;
2248   bi   = b->i;
2249   bj   = b->j;
2250 
2251   /* find ownerships */
2252   rowners_bs = a->rowners_bs;
2253 
2254   /* each proc creates an array to be distributed */
2255   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2256   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2257 
2258   /* row_max for B */
2259   if (rank != size-1){
2260     for (i=0; i<mbs; i++) {
2261       ncols = bi[1] - bi[0]; bi++;
2262       brow  = bs*i;
2263       for (j=0; j<ncols; j++){
2264         bcol = bs*(*bj);
2265         for (kcol=0; kcol<bs; kcol++){
2266           col = bcol + kcol;                 /* local col index */
2267           col += rowners_bs[rank+1];      /* global col index */
2268           for (krow=0; krow<bs; krow++){
2269             atmp = PetscAbsScalar(*ba); ba++;
2270             row = brow + krow;    /* local row index */
2271             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2272             if (work[col] < atmp) work[col] = atmp;
2273           }
2274         }
2275         bj++;
2276       }
2277     }
2278 
2279     /* send values to its owners */
2280     for (dest=rank+1; dest<size; dest++){
2281       svalues = work + rowners_bs[dest];
2282       count   = rowners_bs[dest+1]-rowners_bs[dest];
2283       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2284     }
2285   }
2286 
2287   /* receive values */
2288   if (rank){
2289     rvalues = work;
2290     count   = rowners_bs[rank+1]-rowners_bs[rank];
2291     for (source=0; source<rank; source++){
2292       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2293       /* process values */
2294       for (i=0; i<count; i++){
2295         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2296       }
2297     }
2298   }
2299 
2300   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2301   ierr = PetscFree(work);CHKERRQ(ierr);
2302   PetscFunctionReturn(0);
2303 }
2304 
2305 #undef __FUNCT__
2306 #define __FUNCT__ "MatRelax_MPISBAIJ"
2307 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2308 {
2309   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2310   PetscErrorCode ierr;
2311   PetscInt       mbs=mat->mbs,bs=matin->bs;
2312   PetscScalar    *x,*b,*ptr,zero=0.0;
2313   Vec            bb1;
2314 
2315   PetscFunctionBegin;
2316   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2317   if (bs > 1)
2318     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2319 
2320   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2321     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2322       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2323       its--;
2324     }
2325 
2326     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2327     while (its--){
2328 
2329       /* lower triangular part: slvec0b = - B^T*xx */
2330       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2331 
2332       /* copy xx into slvec0a */
2333       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2334       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2335       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2336       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2337 
2338       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2339 
2340       /* copy bb into slvec1a */
2341       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2342       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2343       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2344       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2345 
2346       /* set slvec1b = 0 */
2347       ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr);
2348 
2349       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2350       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2351       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2352       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2353 
2354       /* upper triangular part: bb1 = bb1 - B*x */
2355       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2356 
2357       /* local diagonal sweep */
2358       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2359     }
2360     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2361   } else {
2362     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2363   }
2364   PetscFunctionReturn(0);
2365 }
2366 
2367 #undef __FUNCT__
2368 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2369 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2370 {
2371   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2372   PetscErrorCode ierr;
2373   Vec            lvec1,bb1;
2374 
2375   PetscFunctionBegin;
2376   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2377   if (matin->bs > 1)
2378     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2379 
2380   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2381     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2382       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2383       its--;
2384     }
2385 
2386     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2387     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2388     while (its--){
2389       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2390 
2391       /* lower diagonal part: bb1 = bb - B^T*xx */
2392       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2393       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2394 
2395       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2396       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2397       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2398 
2399       /* upper diagonal part: bb1 = bb1 - B*x */
2400       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2401       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2402 
2403       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2404 
2405       /* diagonal sweep */
2406       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2407     }
2408     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2409     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2410   } else {
2411     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2412   }
2413   PetscFunctionReturn(0);
2414 }
2415 
2416