xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision e2df7a95c5ea77c899beea10ff9effd6061e7c8f)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
66   PetscErrorCode ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 
76 #define CHUNKSIZE  10
77 
78 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
79 { \
80  \
81     brow = row/bs;  \
82     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
83     rmax = aimax[brow]; nrow = ailen[brow]; \
84       bcol = col/bs; \
85       ridx = row % bs; cidx = col % bs; \
86       low = 0; high = nrow; \
87       while (high-low > 3) { \
88         t = (low+high)/2; \
89         if (rp[t] > bcol) high = t; \
90         else              low  = t; \
91       } \
92       for (_i=low; _i<high; _i++) { \
93         if (rp[_i] > bcol) break; \
94         if (rp[_i] == bcol) { \
95           bap  = ap +  bs2*_i + bs*cidx + ridx; \
96           if (addv == ADD_VALUES) *bap += value;  \
97           else                    *bap  = value;  \
98           goto a_noinsert; \
99         } \
100       } \
101       if (a->nonew == 1) goto a_noinsert; \
102       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
103       MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \
104       N = nrow++ - 1;  \
105       /* shift up all the later entries in this row */ \
106       for (ii=N; ii>=_i; ii--) { \
107         rp[ii+1] = rp[ii]; \
108         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
109       } \
110       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
111       rp[_i]                      = bcol;  \
112       ap[bs2*_i + bs*cidx + ridx] = value;  \
113       a_noinsert:; \
114     ailen[brow] = nrow; \
115 }
116 #ifndef MatSetValues_SeqBAIJ_B_Private
117 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
118 { \
119     brow = row/bs;  \
120     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
121     rmax = bimax[brow]; nrow = bilen[brow]; \
122       bcol = col/bs; \
123       ridx = row % bs; cidx = col % bs; \
124       low = 0; high = nrow; \
125       while (high-low > 3) { \
126         t = (low+high)/2; \
127         if (rp[t] > bcol) high = t; \
128         else              low  = t; \
129       } \
130       for (_i=low; _i<high; _i++) { \
131         if (rp[_i] > bcol) break; \
132         if (rp[_i] == bcol) { \
133           bap  = ap +  bs2*_i + bs*cidx + ridx; \
134           if (addv == ADD_VALUES) *bap += value;  \
135           else                    *bap  = value;  \
136           goto b_noinsert; \
137         } \
138       } \
139       if (b->nonew == 1) goto b_noinsert; \
140       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
141       MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \
142       N = nrow++ - 1;  \
143       /* shift up all the later entries in this row */ \
144       for (ii=N; ii>=_i; ii--) { \
145         rp[ii+1] = rp[ii]; \
146         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
147       } \
148       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
149       rp[_i]                      = bcol;  \
150       ap[bs2*_i + bs*cidx + ridx] = value;  \
151       b_noinsert:; \
152     bilen[brow] = nrow; \
153 }
154 #endif
155 
156 #if defined(PETSC_USE_MAT_SINGLE)
157 #undef __FUNCT__
158 #define __FUNCT__ "MatSetValues_MPISBAIJ"
159 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
160 {
161   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
162   PetscErrorCode ierr;
163   PetscInt       i,N = m*n;
164   MatScalar      *vsingle;
165 
166   PetscFunctionBegin;
167   if (N > b->setvalueslen) {
168     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
169     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
170     b->setvalueslen  = N;
171   }
172   vsingle = b->setvaluescopy;
173 
174   for (i=0; i<N; i++) {
175     vsingle[i] = v[i];
176   }
177   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
178   PetscFunctionReturn(0);
179 }
180 
181 #undef __FUNCT__
182 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
183 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
184 {
185   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
186   PetscErrorCode ierr;
187   PetscInt       i,N = m*n*b->bs2;
188   MatScalar      *vsingle;
189 
190   PetscFunctionBegin;
191   if (N > b->setvalueslen) {
192     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
193     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
194     b->setvalueslen  = N;
195   }
196   vsingle = b->setvaluescopy;
197   for (i=0; i<N; i++) {
198     vsingle[i] = v[i];
199   }
200   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
201   PetscFunctionReturn(0);
202 }
203 #endif
204 
205 /* Only add/insert a(i,j) with i<=j (blocks).
206    Any a(i,j) with i>j input by user is ingored.
207 */
208 #undef __FUNCT__
209 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
210 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
211 {
212   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
213   MatScalar      value;
214   PetscTruth     roworiented = baij->roworiented;
215   PetscErrorCode ierr;
216   PetscInt       i,j,row,col;
217   PetscInt       rstart_orig=baij->rstart_bs;
218   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
219   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
220 
221   /* Some Variables required in the macro */
222   Mat            A = baij->A;
223   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
224   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
225   MatScalar      *aa=a->a;
226 
227   Mat            B = baij->B;
228   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
229   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
230   MatScalar     *ba=b->a;
231 
232   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
233   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
234   MatScalar     *ap,*bap;
235 
236   /* for stash */
237   PetscInt      n_loc, *in_loc=0;
238   MatScalar     *v_loc=0;
239 
240   PetscFunctionBegin;
241 
242   if(!baij->donotstash){
243     ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr);
244     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
245   }
246 
247   for (i=0; i<m; i++) {
248     if (im[i] < 0) continue;
249 #if defined(PETSC_USE_DEBUG)
250     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
251 #endif
252     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
253       row = im[i] - rstart_orig;              /* local row index */
254       for (j=0; j<n; j++) {
255         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
256         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
257           col = in[j] - cstart_orig;          /* local col index */
258           brow = row/bs; bcol = col/bs;
259           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
260           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
261           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
262           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
263         } else if (in[j] < 0) continue;
264 #if defined(PETSC_USE_DEBUG)
265         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
266 #endif
267         else {  /* off-diag entry (B) */
268           if (mat->was_assembled) {
269             if (!baij->colmap) {
270               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
271             }
272 #if defined (PETSC_USE_CTABLE)
273             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
274             col  = col - 1;
275 #else
276             col = baij->colmap[in[j]/bs] - 1;
277 #endif
278             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
279               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
280               col =  in[j];
281               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
282               B = baij->B;
283               b = (Mat_SeqBAIJ*)(B)->data;
284               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
285               ba=b->a;
286             } else col += in[j]%bs;
287           } else col = in[j];
288           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
289           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
290           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
291         }
292       }
293     } else {  /* off processor entry */
294       if (!baij->donotstash) {
295         n_loc = 0;
296         for (j=0; j<n; j++){
297           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
298           in_loc[n_loc] = in[j];
299           if (roworiented) {
300             v_loc[n_loc] = v[i*n+j];
301           } else {
302             v_loc[n_loc] = v[j*m+i];
303           }
304           n_loc++;
305         }
306         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
307       }
308     }
309   }
310 
311   if(!baij->donotstash){
312     ierr = PetscFree(in_loc);CHKERRQ(ierr);
313     ierr = PetscFree(v_loc);CHKERRQ(ierr);
314   }
315   PetscFunctionReturn(0);
316 }
317 
318 #undef __FUNCT__
319 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
320 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
321 {
322   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
323   const MatScalar *value;
324   MatScalar       *barray=baij->barray;
325   PetscTruth      roworiented = baij->roworiented;
326   PetscErrorCode  ierr;
327   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
328   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
329   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
330 
331   PetscFunctionBegin;
332   if(!barray) {
333     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
334     baij->barray = barray;
335   }
336 
337   if (roworiented) {
338     stepval = (n-1)*bs;
339   } else {
340     stepval = (m-1)*bs;
341   }
342   for (i=0; i<m; i++) {
343     if (im[i] < 0) continue;
344 #if defined(PETSC_USE_DEBUG)
345     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
346 #endif
347     if (im[i] >= rstart && im[i] < rend) {
348       row = im[i] - rstart;
349       for (j=0; j<n; j++) {
350         /* If NumCol = 1 then a copy is not required */
351         if ((roworiented) && (n == 1)) {
352           barray = (MatScalar*) v + i*bs2;
353         } else if((!roworiented) && (m == 1)) {
354           barray = (MatScalar*) v + j*bs2;
355         } else { /* Here a copy is required */
356           if (roworiented) {
357             value = v + i*(stepval+bs)*bs + j*bs;
358           } else {
359             value = v + j*(stepval+bs)*bs + i*bs;
360           }
361           for (ii=0; ii<bs; ii++,value+=stepval) {
362             for (jj=0; jj<bs; jj++) {
363               *barray++  = *value++;
364             }
365           }
366           barray -=bs2;
367         }
368 
369         if (in[j] >= cstart && in[j] < cend){
370           col  = in[j] - cstart;
371           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
372         }
373         else if (in[j] < 0) continue;
374 #if defined(PETSC_USE_DEBUG)
375         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
376 #endif
377         else {
378           if (mat->was_assembled) {
379             if (!baij->colmap) {
380               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
381             }
382 
383 #if defined(PETSC_USE_DEBUG)
384 #if defined (PETSC_USE_CTABLE)
385             { PetscInt data;
386               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
387               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
388             }
389 #else
390             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
391 #endif
392 #endif
393 #if defined (PETSC_USE_CTABLE)
394 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
395             col  = (col - 1)/bs;
396 #else
397             col = (baij->colmap[in[j]] - 1)/bs;
398 #endif
399             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
400               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
401               col =  in[j];
402             }
403           }
404           else col = in[j];
405           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
406         }
407       }
408     } else {
409       if (!baij->donotstash) {
410         if (roworiented) {
411           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
412         } else {
413           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
414         }
415       }
416     }
417   }
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatGetValues_MPISBAIJ"
423 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
424 {
425   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
426   PetscErrorCode ierr;
427   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
428   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
429 
430   PetscFunctionBegin;
431   for (i=0; i<m; i++) {
432     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
433     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
434     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
435       row = idxm[i] - bsrstart;
436       for (j=0; j<n; j++) {
437         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
438         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
439         if (idxn[j] >= bscstart && idxn[j] < bscend){
440           col = idxn[j] - bscstart;
441           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
442         } else {
443           if (!baij->colmap) {
444             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
445           }
446 #if defined (PETSC_USE_CTABLE)
447           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
448           data --;
449 #else
450           data = baij->colmap[idxn[j]/bs]-1;
451 #endif
452           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
453           else {
454             col  = data + idxn[j]%bs;
455             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
456           }
457         }
458       }
459     } else {
460       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
461     }
462   }
463  PetscFunctionReturn(0);
464 }
465 
466 #undef __FUNCT__
467 #define __FUNCT__ "MatNorm_MPISBAIJ"
468 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
469 {
470   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
471   PetscErrorCode ierr;
472   PetscReal      sum[2],*lnorm2;
473 
474   PetscFunctionBegin;
475   if (baij->size == 1) {
476     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
477   } else {
478     if (type == NORM_FROBENIUS) {
479       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
480       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
481       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
482       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
483       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
484       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
485       *norm = sqrt(sum[0] + 2*sum[1]);
486       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
487     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
488       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
489       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
490       PetscReal    *rsum,*rsum2,vabs;
491       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstart,nz;
492       PetscInt     brow,bcol,col,bs=baij->A->bs,row,grow,gcol,mbs=amat->mbs;
493       MatScalar    *v;
494 
495       ierr  = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr);
496       rsum2 = rsum + mat->N;
497       ierr  = PetscMemzero(rsum,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
498       /* Amat */
499       v = amat->a; jj = amat->j;
500       for (brow=0; brow<mbs; brow++) {
501         grow = bs*(rstart + brow);
502         nz = amat->i[brow+1] - amat->i[brow];
503         for (bcol=0; bcol<nz; bcol++){
504           gcol = bs*(rstart + *jj); jj++;
505           for (col=0; col<bs; col++){
506             for (row=0; row<bs; row++){
507               vabs = PetscAbsScalar(*v); v++;
508               rsum[gcol+col] += vabs;
509               /* non-diagonal block */
510               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
511             }
512           }
513         }
514       }
515       /* Bmat */
516       v = bmat->a; jj = bmat->j;
517       for (brow=0; brow<mbs; brow++) {
518         grow = bs*(rstart + brow);
519         nz = bmat->i[brow+1] - bmat->i[brow];
520         for (bcol=0; bcol<nz; bcol++){
521           gcol = bs*garray[*jj]; jj++;
522           for (col=0; col<bs; col++){
523             for (row=0; row<bs; row++){
524               vabs = PetscAbsScalar(*v); v++;
525               rsum[gcol+col] += vabs;
526               rsum[grow+row] += vabs;
527             }
528           }
529         }
530       }
531       ierr = MPI_Allreduce(rsum,rsum2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
532       *norm = 0.0;
533       for (col=0; col<mat->N; col++) {
534         if (rsum2[col] > *norm) *norm = rsum2[col];
535       }
536       ierr = PetscFree(rsum);CHKERRQ(ierr);
537     } else {
538       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
539     }
540   }
541   PetscFunctionReturn(0);
542 }
543 
544 #undef __FUNCT__
545 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
546 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
547 {
548   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
549   PetscErrorCode ierr;
550   PetscInt       nstash,reallocs;
551   InsertMode     addv;
552 
553   PetscFunctionBegin;
554   if (baij->donotstash) {
555     PetscFunctionReturn(0);
556   }
557 
558   /* make sure all processors are either in INSERTMODE or ADDMODE */
559   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
560   if (addv == (ADD_VALUES|INSERT_VALUES)) {
561     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
562   }
563   mat->insertmode = addv; /* in case this processor had no cache */
564 
565   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
566   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
567   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
568   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
569   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
570   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
571   PetscFunctionReturn(0);
572 }
573 
574 #undef __FUNCT__
575 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
576 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
577 {
578   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
579   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
580   Mat_SeqBAIJ    *b=(Mat_SeqBAIJ*)baij->B->data;
581   PetscErrorCode ierr;
582   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
583   PetscInt       *row,*col,other_disassembled;
584   PetscMPIInt    n;
585   PetscTruth     r1,r2,r3;
586   MatScalar      *val;
587   InsertMode     addv = mat->insertmode;
588 
589   PetscFunctionBegin;
590 
591   if (!baij->donotstash) {
592     while (1) {
593       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
594       if (!flg) break;
595 
596       for (i=0; i<n;) {
597         /* Now identify the consecutive vals belonging to the same row */
598         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
599         if (j < n) ncols = j-i;
600         else       ncols = n-i;
601         /* Now assemble all these values with a single function call */
602         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
603         i = j;
604       }
605     }
606     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
607     /* Now process the block-stash. Since the values are stashed column-oriented,
608        set the roworiented flag to column oriented, and after MatSetValues()
609        restore the original flags */
610     r1 = baij->roworiented;
611     r2 = a->roworiented;
612     r3 = b->roworiented;
613     baij->roworiented = PETSC_FALSE;
614     a->roworiented    = PETSC_FALSE;
615     b->roworiented    = PETSC_FALSE;
616     while (1) {
617       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
618       if (!flg) break;
619 
620       for (i=0; i<n;) {
621         /* Now identify the consecutive vals belonging to the same row */
622         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
623         if (j < n) ncols = j-i;
624         else       ncols = n-i;
625         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
626         i = j;
627       }
628     }
629     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
630     baij->roworiented = r1;
631     a->roworiented    = r2;
632     b->roworiented    = r3;
633   }
634 
635   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
636   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
637 
638   /* determine if any processor has disassembled, if so we must
639      also disassemble ourselfs, in order that we may reassemble. */
640   /*
641      if nonzero structure of submatrix B cannot change then we know that
642      no processor disassembled thus we can skip this stuff
643   */
644   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
645     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
646     if (mat->was_assembled && !other_disassembled) {
647       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
648     }
649   }
650 
651   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
652     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
653   }
654   b->compressedrow.use = PETSC_TRUE;
655   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
656   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
657 
658   if (baij->rowvalues) {
659     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
660     baij->rowvalues = 0;
661   }
662 
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
668 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
669 {
670   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
671   PetscErrorCode    ierr;
672   PetscInt          bs = mat->bs;
673   PetscMPIInt       size = baij->size,rank = baij->rank;
674   PetscTruth        iascii,isdraw;
675   PetscViewer       sviewer;
676   PetscViewerFormat format;
677 
678   PetscFunctionBegin;
679   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
680   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
681   if (iascii) {
682     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
683     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
684       MatInfo info;
685       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
686       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
687       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
688               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
689               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
690       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
691       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
692       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
693       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
694       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
695       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
696       PetscFunctionReturn(0);
697     } else if (format == PETSC_VIEWER_ASCII_INFO) {
698       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
699       PetscFunctionReturn(0);
700     }
701   }
702 
703   if (isdraw) {
704     PetscDraw       draw;
705     PetscTruth isnull;
706     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
707     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
708   }
709 
710   if (size == 1) {
711     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
712     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
713   } else {
714     /* assemble the entire matrix onto first processor. */
715     Mat         A;
716     Mat_SeqSBAIJ *Aloc;
717     Mat_SeqBAIJ *Bloc;
718     PetscInt         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
719     MatScalar   *a;
720 
721     /* Should this be the same type as mat? */
722     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
723     if (!rank) {
724       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
725     } else {
726       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
727     }
728     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
729     ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
730     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
731 
732     /* copy over the A part */
733     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
734     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
735     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
736 
737     for (i=0; i<mbs; i++) {
738       rvals[0] = bs*(baij->rstart + i);
739       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
740       for (j=ai[i]; j<ai[i+1]; j++) {
741         col = (baij->cstart+aj[j])*bs;
742         for (k=0; k<bs; k++) {
743           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
744           col++; a += bs;
745         }
746       }
747     }
748     /* copy over the B part */
749     Bloc = (Mat_SeqBAIJ*)baij->B->data;
750     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
751     for (i=0; i<mbs; i++) {
752       rvals[0] = bs*(baij->rstart + i);
753       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
754       for (j=ai[i]; j<ai[i+1]; j++) {
755         col = baij->garray[aj[j]]*bs;
756         for (k=0; k<bs; k++) {
757           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
758           col++; a += bs;
759         }
760       }
761     }
762     ierr = PetscFree(rvals);CHKERRQ(ierr);
763     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
764     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
765     /*
766        Everyone has to call to draw the matrix since the graphics waits are
767        synchronized across all processors that share the PetscDraw object
768     */
769     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
770     if (!rank) {
771       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
772       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
773     }
774     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
775     ierr = MatDestroy(A);CHKERRQ(ierr);
776   }
777   PetscFunctionReturn(0);
778 }
779 
780 #undef __FUNCT__
781 #define __FUNCT__ "MatView_MPISBAIJ"
782 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
783 {
784   PetscErrorCode ierr;
785   PetscTruth     iascii,isdraw,issocket,isbinary;
786 
787   PetscFunctionBegin;
788   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
789   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
790   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
791   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
792   if (iascii || isdraw || issocket || isbinary) {
793     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
794   } else {
795     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
796   }
797   PetscFunctionReturn(0);
798 }
799 
800 #undef __FUNCT__
801 #define __FUNCT__ "MatDestroy_MPISBAIJ"
802 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
803 {
804   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
805   PetscErrorCode ierr;
806 
807   PetscFunctionBegin;
808 #if defined(PETSC_USE_LOG)
809   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
810 #endif
811   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
812   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
813   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
814   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
815   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
816 #if defined (PETSC_USE_CTABLE)
817   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
818 #else
819   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
820 #endif
821   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
822   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
823   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
824   if (baij->slvec0) {
825     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
826     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
827   }
828   if (baij->slvec1) {
829     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
830     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
831     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
832   }
833   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
834   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
835   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
836   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
837 #if defined(PETSC_USE_MAT_SINGLE)
838   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
839 #endif
840   ierr = PetscFree(baij);CHKERRQ(ierr);
841 
842   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
843   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
844   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
845   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
846   PetscFunctionReturn(0);
847 }
848 
849 #undef __FUNCT__
850 #define __FUNCT__ "MatMult_MPISBAIJ"
851 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
852 {
853   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
854   PetscErrorCode ierr;
855   PetscInt       nt,mbs=a->mbs,bs=A->bs;
856   PetscScalar    *x,*from,zero=0.0;
857 
858   PetscFunctionBegin;
859   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
860   if (nt != A->n) {
861     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
862   }
863   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
864   if (nt != A->m) {
865     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
866   }
867 
868   /* diagonal part */
869   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
870   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
871 
872   /* subdiagonal part */
873   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
874 
875   /* copy x into the vec slvec0 */
876   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
877   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
878   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
879   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
880 
881   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
882   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
883   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
884 
885   /* supperdiagonal part */
886   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
887 
888   PetscFunctionReturn(0);
889 }
890 
891 #undef __FUNCT__
892 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
893 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
894 {
895   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
896   PetscErrorCode ierr;
897   PetscInt       nt;
898 
899   PetscFunctionBegin;
900   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
901   if (nt != A->n) {
902     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
903   }
904   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
905   if (nt != A->m) {
906     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
907   }
908 
909   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
910   /* do diagonal part */
911   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
912   /* do supperdiagonal part */
913   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
914   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
915   /* do subdiagonal part */
916   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
917   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
918   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
919 
920   PetscFunctionReturn(0);
921 }
922 
923 #undef __FUNCT__
924 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
925 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
926 {
927   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
928   PetscErrorCode ierr;
929   PetscInt       mbs=a->mbs,bs=A->bs;
930   PetscScalar    *x,*from,zero=0.0;
931 
932   PetscFunctionBegin;
933   /*
934   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
935   PetscSynchronizedFlush(A->comm);
936   */
937   /* diagonal part */
938   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
939   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
940 
941   /* subdiagonal part */
942   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
943 
944   /* copy x into the vec slvec0 */
945   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
946   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
947   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
948   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
949 
950   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
951   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
952   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
953 
954   /* supperdiagonal part */
955   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
956 
957   PetscFunctionReturn(0);
958 }
959 
960 #undef __FUNCT__
961 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
962 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
963 {
964   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
965   PetscErrorCode ierr;
966 
967   PetscFunctionBegin;
968   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
969   /* do diagonal part */
970   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
971   /* do supperdiagonal part */
972   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
973   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
974 
975   /* do subdiagonal part */
976   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
977   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
978   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
979 
980   PetscFunctionReturn(0);
981 }
982 
983 #undef __FUNCT__
984 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
985 PetscErrorCode MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
986 {
987   PetscErrorCode ierr;
988 
989   PetscFunctionBegin;
990   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
991   PetscFunctionReturn(0);
992 }
993 
994 #undef __FUNCT__
995 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
996 PetscErrorCode MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
997 {
998   PetscErrorCode ierr;
999 
1000   PetscFunctionBegin;
1001   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 /*
1006   This only works correctly for square matrices where the subblock A->A is the
1007    diagonal block
1008 */
1009 #undef __FUNCT__
1010 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1011 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1012 {
1013   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1014   PetscErrorCode ierr;
1015 
1016   PetscFunctionBegin;
1017   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1018   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1019   PetscFunctionReturn(0);
1020 }
1021 
1022 #undef __FUNCT__
1023 #define __FUNCT__ "MatScale_MPISBAIJ"
1024 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1025 {
1026   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1027   PetscErrorCode ierr;
1028 
1029   PetscFunctionBegin;
1030   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1031   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1032   PetscFunctionReturn(0);
1033 }
1034 
1035 #undef __FUNCT__
1036 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1037 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1038 {
1039   PetscFunctionBegin;
1040   if (matin) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format");
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 #undef __FUNCT__
1045 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1046 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1047 {
1048   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1049 
1050   PetscFunctionBegin;
1051   if (!baij->getrowactive) {
1052     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1053   }
1054   baij->getrowactive = PETSC_FALSE;
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 #undef __FUNCT__
1059 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1060 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1061 {
1062   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1063   PetscErrorCode ierr;
1064 
1065   PetscFunctionBegin;
1066   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1067   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1073 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1074 {
1075   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1076   Mat            A = a->A,B = a->B;
1077   PetscErrorCode ierr;
1078   PetscReal      isend[5],irecv[5];
1079 
1080   PetscFunctionBegin;
1081   info->block_size     = (PetscReal)matin->bs;
1082   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1083   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1084   isend[3] = info->memory;  isend[4] = info->mallocs;
1085   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1086   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1087   isend[3] += info->memory;  isend[4] += info->mallocs;
1088   if (flag == MAT_LOCAL) {
1089     info->nz_used      = isend[0];
1090     info->nz_allocated = isend[1];
1091     info->nz_unneeded  = isend[2];
1092     info->memory       = isend[3];
1093     info->mallocs      = isend[4];
1094   } else if (flag == MAT_GLOBAL_MAX) {
1095     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1096     info->nz_used      = irecv[0];
1097     info->nz_allocated = irecv[1];
1098     info->nz_unneeded  = irecv[2];
1099     info->memory       = irecv[3];
1100     info->mallocs      = irecv[4];
1101   } else if (flag == MAT_GLOBAL_SUM) {
1102     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1103     info->nz_used      = irecv[0];
1104     info->nz_allocated = irecv[1];
1105     info->nz_unneeded  = irecv[2];
1106     info->memory       = irecv[3];
1107     info->mallocs      = irecv[4];
1108   } else {
1109     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1110   }
1111   info->rows_global       = (PetscReal)A->M;
1112   info->columns_global    = (PetscReal)A->N;
1113   info->rows_local        = (PetscReal)A->m;
1114   info->columns_local     = (PetscReal)A->N;
1115   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1116   info->fill_ratio_needed = 0;
1117   info->factor_mallocs    = 0;
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 #undef __FUNCT__
1122 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1123 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1124 {
1125   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1126   PetscErrorCode ierr;
1127 
1128   PetscFunctionBegin;
1129   switch (op) {
1130   case MAT_NO_NEW_NONZERO_LOCATIONS:
1131   case MAT_YES_NEW_NONZERO_LOCATIONS:
1132   case MAT_COLUMNS_UNSORTED:
1133   case MAT_COLUMNS_SORTED:
1134   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1135   case MAT_KEEP_ZEROED_ROWS:
1136   case MAT_NEW_NONZERO_LOCATION_ERR:
1137     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1138     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1139     break;
1140   case MAT_ROW_ORIENTED:
1141     a->roworiented = PETSC_TRUE;
1142     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1143     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1144     break;
1145   case MAT_ROWS_SORTED:
1146   case MAT_ROWS_UNSORTED:
1147   case MAT_YES_NEW_DIAGONALS:
1148     ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr);
1149     break;
1150   case MAT_COLUMN_ORIENTED:
1151     a->roworiented = PETSC_FALSE;
1152     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1153     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1154     break;
1155   case MAT_IGNORE_OFF_PROC_ENTRIES:
1156     a->donotstash = PETSC_TRUE;
1157     break;
1158   case MAT_NO_NEW_DIAGONALS:
1159     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1160   case MAT_USE_HASH_TABLE:
1161     a->ht_flag = PETSC_TRUE;
1162     break;
1163   case MAT_NOT_SYMMETRIC:
1164   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1165   case MAT_HERMITIAN:
1166     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1167   case MAT_SYMMETRIC:
1168   case MAT_STRUCTURALLY_SYMMETRIC:
1169   case MAT_NOT_HERMITIAN:
1170   case MAT_SYMMETRY_ETERNAL:
1171   case MAT_NOT_SYMMETRY_ETERNAL:
1172     break;
1173   default:
1174     SETERRQ(PETSC_ERR_SUP,"unknown option");
1175   }
1176   PetscFunctionReturn(0);
1177 }
1178 
1179 #undef __FUNCT__
1180 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1181 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1182 {
1183   PetscErrorCode ierr;
1184   PetscFunctionBegin;
1185   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 #undef __FUNCT__
1190 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1191 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1192 {
1193   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1194   Mat            a=baij->A, b=baij->B;
1195   PetscErrorCode ierr;
1196   PetscInt       nv,m,n;
1197   PetscTruth     flg;
1198 
1199   PetscFunctionBegin;
1200   if (ll != rr){
1201     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1202     if (!flg)
1203       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1204   }
1205   if (!ll) PetscFunctionReturn(0);
1206 
1207   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1208   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1209 
1210   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1211   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1212 
1213   ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1214 
1215   /* left diagonalscale the off-diagonal part */
1216   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1217 
1218   /* scale the diagonal part */
1219   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1220 
1221   /* right diagonalscale the off-diagonal part */
1222   ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1223   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 #undef __FUNCT__
1228 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1229 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1230 {
1231   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1232   MPI_Comm          comm = A->comm;
1233   static PetscTruth called = PETSC_FALSE;
1234   PetscErrorCode    ierr;
1235 
1236   PetscFunctionBegin;
1237   if (!a->rank) {
1238     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1239   }
1240   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1241   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1242   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 #undef __FUNCT__
1247 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1248 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1249 {
1250   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1251   PetscErrorCode ierr;
1252 
1253   PetscFunctionBegin;
1254   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1259 
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatEqual_MPISBAIJ"
1262 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1263 {
1264   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1265   Mat            a,b,c,d;
1266   PetscTruth     flg;
1267   PetscErrorCode ierr;
1268 
1269   PetscFunctionBegin;
1270   a = matA->A; b = matA->B;
1271   c = matB->A; d = matB->B;
1272 
1273   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1274   if (flg) {
1275     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1276   }
1277   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 #undef __FUNCT__
1282 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1283 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1284 {
1285   PetscErrorCode ierr;
1286 
1287   PetscFunctionBegin;
1288   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1289   PetscFunctionReturn(0);
1290 }
1291 
1292 #undef __FUNCT__
1293 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1294 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1295 {
1296   PetscErrorCode ierr;
1297   PetscInt       i;
1298   PetscTruth     flg;
1299 
1300   PetscFunctionBegin;
1301   for (i=0; i<n; i++) {
1302     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1303     if (!flg) {
1304       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1305     }
1306   }
1307   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 
1312 /* -------------------------------------------------------------------*/
1313 static struct _MatOps MatOps_Values = {
1314        MatSetValues_MPISBAIJ,
1315        MatGetRow_MPISBAIJ,
1316        MatRestoreRow_MPISBAIJ,
1317        MatMult_MPISBAIJ,
1318 /* 4*/ MatMultAdd_MPISBAIJ,
1319        MatMultTranspose_MPISBAIJ,
1320        MatMultTransposeAdd_MPISBAIJ,
1321        0,
1322        0,
1323        0,
1324 /*10*/ 0,
1325        0,
1326        0,
1327        MatRelax_MPISBAIJ,
1328        MatTranspose_MPISBAIJ,
1329 /*15*/ MatGetInfo_MPISBAIJ,
1330        MatEqual_MPISBAIJ,
1331        MatGetDiagonal_MPISBAIJ,
1332        MatDiagonalScale_MPISBAIJ,
1333        MatNorm_MPISBAIJ,
1334 /*20*/ MatAssemblyBegin_MPISBAIJ,
1335        MatAssemblyEnd_MPISBAIJ,
1336        0,
1337        MatSetOption_MPISBAIJ,
1338        MatZeroEntries_MPISBAIJ,
1339 /*25*/ 0,
1340        0,
1341        0,
1342        0,
1343        0,
1344 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1345        0,
1346        0,
1347        0,
1348        0,
1349 /*35*/ MatDuplicate_MPISBAIJ,
1350        0,
1351        0,
1352        0,
1353        0,
1354 /*40*/ 0,
1355        MatGetSubMatrices_MPISBAIJ,
1356        MatIncreaseOverlap_MPISBAIJ,
1357        MatGetValues_MPISBAIJ,
1358        0,
1359 /*45*/ MatPrintHelp_MPISBAIJ,
1360        MatScale_MPISBAIJ,
1361        0,
1362        0,
1363        0,
1364 /*50*/ 0,
1365        0,
1366        0,
1367        0,
1368        0,
1369 /*55*/ 0,
1370        0,
1371        MatSetUnfactored_MPISBAIJ,
1372        0,
1373        MatSetValuesBlocked_MPISBAIJ,
1374 /*60*/ 0,
1375        0,
1376        0,
1377        MatGetPetscMaps_Petsc,
1378        0,
1379 /*65*/ 0,
1380        0,
1381        0,
1382        0,
1383        0,
1384 /*70*/ MatGetRowMax_MPISBAIJ,
1385        0,
1386        0,
1387        0,
1388        0,
1389 /*75*/ 0,
1390        0,
1391        0,
1392        0,
1393        0,
1394 /*80*/ 0,
1395        0,
1396        0,
1397        0,
1398        MatLoad_MPISBAIJ,
1399 /*85*/ 0,
1400        0,
1401        0,
1402        0,
1403        0,
1404 /*90*/ 0,
1405        0,
1406        0,
1407        0,
1408        0,
1409 /*95*/ 0,
1410        0,
1411        0,
1412        0};
1413 
1414 
1415 EXTERN_C_BEGIN
1416 #undef __FUNCT__
1417 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1418 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1419 {
1420   PetscFunctionBegin;
1421   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1422   *iscopy = PETSC_FALSE;
1423   PetscFunctionReturn(0);
1424 }
1425 EXTERN_C_END
1426 
1427 EXTERN_C_BEGIN
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1430 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1431 {
1432   Mat_MPISBAIJ   *b;
1433   PetscErrorCode ierr;
1434   PetscInt       i,mbs,Mbs;
1435 
1436   PetscFunctionBegin;
1437   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1438 
1439   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1440   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1441   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1442   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1443   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1444   if (d_nnz) {
1445     for (i=0; i<B->m/bs; i++) {
1446       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1447     }
1448   }
1449   if (o_nnz) {
1450     for (i=0; i<B->m/bs; i++) {
1451       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1452     }
1453   }
1454   B->preallocated = PETSC_TRUE;
1455   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1456   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1457   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1458   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1459 
1460   b   = (Mat_MPISBAIJ*)B->data;
1461   mbs = B->m/bs;
1462   Mbs = B->M/bs;
1463   if (mbs*bs != B->m) {
1464     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs);
1465   }
1466 
1467   B->bs  = bs;
1468   b->bs2 = bs*bs;
1469   b->mbs = mbs;
1470   b->nbs = mbs;
1471   b->Mbs = Mbs;
1472   b->Nbs = Mbs;
1473 
1474   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
1475   b->rowners[0]    = 0;
1476   for (i=2; i<=b->size; i++) {
1477     b->rowners[i] += b->rowners[i-1];
1478   }
1479   b->rstart    = b->rowners[b->rank];
1480   b->rend      = b->rowners[b->rank+1];
1481   b->cstart    = b->rstart;
1482   b->cend      = b->rend;
1483   for (i=0; i<=b->size; i++) {
1484     b->rowners_bs[i] = b->rowners[i]*bs;
1485   }
1486   b->rstart_bs = b-> rstart*bs;
1487   b->rend_bs   = b->rend*bs;
1488 
1489   b->cstart_bs = b->cstart*bs;
1490   b->cend_bs   = b->cend*bs;
1491 
1492   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1493   ierr = MatSetSizes(b->A,B->m,B->m,B->m,B->m);CHKERRQ(ierr);
1494   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1495   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1496   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1497 
1498   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1499   ierr = MatSetSizes(b->B,B->m,B->M,B->m,B->M);CHKERRQ(ierr);
1500   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1501   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1502   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1503 
1504   /* build cache for off array entries formed */
1505   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1506 
1507   PetscFunctionReturn(0);
1508 }
1509 EXTERN_C_END
1510 
1511 /*MC
1512    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1513    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1514 
1515    Options Database Keys:
1516 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1517 
1518   Level: beginner
1519 
1520 .seealso: MatCreateMPISBAIJ
1521 M*/
1522 
1523 EXTERN_C_BEGIN
1524 #undef __FUNCT__
1525 #define __FUNCT__ "MatCreate_MPISBAIJ"
1526 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1527 {
1528   Mat_MPISBAIJ   *b;
1529   PetscErrorCode ierr;
1530   PetscTruth     flg;
1531 
1532   PetscFunctionBegin;
1533 
1534   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1535   B->data = (void*)b;
1536   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1537 
1538   B->ops->destroy    = MatDestroy_MPISBAIJ;
1539   B->ops->view       = MatView_MPISBAIJ;
1540   B->mapping    = 0;
1541   B->factor     = 0;
1542   B->assembled  = PETSC_FALSE;
1543 
1544   B->insertmode = NOT_SET_VALUES;
1545   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1546   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1547 
1548   /* build local table of row and column ownerships */
1549   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
1550   b->cowners    = b->rowners + b->size + 2;
1551   b->rowners_bs = b->cowners + b->size + 2;
1552   ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1553 
1554   /* build cache for off array entries formed */
1555   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1556   b->donotstash  = PETSC_FALSE;
1557   b->colmap      = PETSC_NULL;
1558   b->garray      = PETSC_NULL;
1559   b->roworiented = PETSC_TRUE;
1560 
1561 #if defined(PETSC_USE_MAT_SINGLE)
1562   /* stuff for MatSetValues_XXX in single precision */
1563   b->setvalueslen     = 0;
1564   b->setvaluescopy    = PETSC_NULL;
1565 #endif
1566 
1567   /* stuff used in block assembly */
1568   b->barray       = 0;
1569 
1570   /* stuff used for matrix vector multiply */
1571   b->lvec         = 0;
1572   b->Mvctx        = 0;
1573   b->slvec0       = 0;
1574   b->slvec0b      = 0;
1575   b->slvec1       = 0;
1576   b->slvec1a      = 0;
1577   b->slvec1b      = 0;
1578   b->sMvctx       = 0;
1579 
1580   /* stuff for MatGetRow() */
1581   b->rowindices   = 0;
1582   b->rowvalues    = 0;
1583   b->getrowactive = PETSC_FALSE;
1584 
1585   /* hash table stuff */
1586   b->ht           = 0;
1587   b->hd           = 0;
1588   b->ht_size      = 0;
1589   b->ht_flag      = PETSC_FALSE;
1590   b->ht_fact      = 0;
1591   b->ht_total_ct  = 0;
1592   b->ht_insert_ct = 0;
1593 
1594   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1595   if (flg) {
1596     PetscReal fact = 1.39;
1597     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1598     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1599     if (fact <= 1.0) fact = 1.39;
1600     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1601     ierr = PetscLogInfo((0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr);
1602   }
1603   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1604                                      "MatStoreValues_MPISBAIJ",
1605                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1606   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1607                                      "MatRetrieveValues_MPISBAIJ",
1608                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1609   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1610                                      "MatGetDiagonalBlock_MPISBAIJ",
1611                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1612   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1613                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1614                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1615   B->symmetric                  = PETSC_TRUE;
1616   B->structurally_symmetric     = PETSC_TRUE;
1617   B->symmetric_set              = PETSC_TRUE;
1618   B->structurally_symmetric_set = PETSC_TRUE;
1619   PetscFunctionReturn(0);
1620 }
1621 EXTERN_C_END
1622 
1623 /*MC
1624    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1625 
1626    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1627    and MATMPISBAIJ otherwise.
1628 
1629    Options Database Keys:
1630 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1631 
1632   Level: beginner
1633 
1634 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1635 M*/
1636 
1637 EXTERN_C_BEGIN
1638 #undef __FUNCT__
1639 #define __FUNCT__ "MatCreate_SBAIJ"
1640 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1641 {
1642   PetscErrorCode ierr;
1643   PetscMPIInt    size;
1644 
1645   PetscFunctionBegin;
1646   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1647   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1648   if (size == 1) {
1649     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1650   } else {
1651     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1652   }
1653   PetscFunctionReturn(0);
1654 }
1655 EXTERN_C_END
1656 
1657 #undef __FUNCT__
1658 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1659 /*@C
1660    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1661    the user should preallocate the matrix storage by setting the parameters
1662    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1663    performance can be increased by more than a factor of 50.
1664 
1665    Collective on Mat
1666 
1667    Input Parameters:
1668 +  A - the matrix
1669 .  bs   - size of blockk
1670 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1671            submatrix  (same for all local rows)
1672 .  d_nnz - array containing the number of block nonzeros in the various block rows
1673            in the upper triangular and diagonal part of the in diagonal portion of the local
1674            (possibly different for each block row) or PETSC_NULL.  You must leave room
1675            for the diagonal entry even if it is zero.
1676 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1677            submatrix (same for all local rows).
1678 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1679            off-diagonal portion of the local submatrix (possibly different for
1680            each block row) or PETSC_NULL.
1681 
1682 
1683    Options Database Keys:
1684 .   -mat_no_unroll - uses code that does not unroll the loops in the
1685                      block calculations (much slower)
1686 .   -mat_block_size - size of the blocks to use
1687 
1688    Notes:
1689 
1690    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1691    than it must be used on all processors that share the object for that argument.
1692 
1693    If the *_nnz parameter is given then the *_nz parameter is ignored
1694 
1695    Storage Information:
1696    For a square global matrix we define each processor's diagonal portion
1697    to be its local rows and the corresponding columns (a square submatrix);
1698    each processor's off-diagonal portion encompasses the remainder of the
1699    local matrix (a rectangular submatrix).
1700 
1701    The user can specify preallocated storage for the diagonal part of
1702    the local submatrix with either d_nz or d_nnz (not both).  Set
1703    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1704    memory allocation.  Likewise, specify preallocated storage for the
1705    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1706 
1707    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1708    the figure below we depict these three local rows and all columns (0-11).
1709 
1710 .vb
1711            0 1 2 3 4 5 6 7 8 9 10 11
1712           -------------------
1713    row 3  |  o o o d d d o o o o o o
1714    row 4  |  o o o d d d o o o o o o
1715    row 5  |  o o o d d d o o o o o o
1716           -------------------
1717 .ve
1718 
1719    Thus, any entries in the d locations are stored in the d (diagonal)
1720    submatrix, and any entries in the o locations are stored in the
1721    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1722    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1723 
1724    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1725    plus the diagonal part of the d matrix,
1726    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1727    In general, for PDE problems in which most nonzeros are near the diagonal,
1728    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1729    or you will get TERRIBLE performance; see the users' manual chapter on
1730    matrices.
1731 
1732    Level: intermediate
1733 
1734 .keywords: matrix, block, aij, compressed row, sparse, parallel
1735 
1736 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1737 @*/
1738 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1739 {
1740   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1741 
1742   PetscFunctionBegin;
1743   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1744   if (f) {
1745     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1746   }
1747   PetscFunctionReturn(0);
1748 }
1749 
1750 #undef __FUNCT__
1751 #define __FUNCT__ "MatCreateMPISBAIJ"
1752 /*@C
1753    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1754    (block compressed row).  For good matrix assembly performance
1755    the user should preallocate the matrix storage by setting the parameters
1756    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1757    performance can be increased by more than a factor of 50.
1758 
1759    Collective on MPI_Comm
1760 
1761    Input Parameters:
1762 +  comm - MPI communicator
1763 .  bs   - size of blockk
1764 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1765            This value should be the same as the local size used in creating the
1766            y vector for the matrix-vector product y = Ax.
1767 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1768            This value should be the same as the local size used in creating the
1769            x vector for the matrix-vector product y = Ax.
1770 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1771 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1772 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1773            submatrix  (same for all local rows)
1774 .  d_nnz - array containing the number of block nonzeros in the various block rows
1775            in the upper triangular portion of the in diagonal portion of the local
1776            (possibly different for each block block row) or PETSC_NULL.
1777            You must leave room for the diagonal entry even if it is zero.
1778 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1779            submatrix (same for all local rows).
1780 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1781            off-diagonal portion of the local submatrix (possibly different for
1782            each block row) or PETSC_NULL.
1783 
1784    Output Parameter:
1785 .  A - the matrix
1786 
1787    Options Database Keys:
1788 .   -mat_no_unroll - uses code that does not unroll the loops in the
1789                      block calculations (much slower)
1790 .   -mat_block_size - size of the blocks to use
1791 .   -mat_mpi - use the parallel matrix data structures even on one processor
1792                (defaults to using SeqBAIJ format on one processor)
1793 
1794    Notes:
1795    The number of rows and columns must be divisible by blocksize.
1796 
1797    The user MUST specify either the local or global matrix dimensions
1798    (possibly both).
1799 
1800    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1801    than it must be used on all processors that share the object for that argument.
1802 
1803    If the *_nnz parameter is given then the *_nz parameter is ignored
1804 
1805    Storage Information:
1806    For a square global matrix we define each processor's diagonal portion
1807    to be its local rows and the corresponding columns (a square submatrix);
1808    each processor's off-diagonal portion encompasses the remainder of the
1809    local matrix (a rectangular submatrix).
1810 
1811    The user can specify preallocated storage for the diagonal part of
1812    the local submatrix with either d_nz or d_nnz (not both).  Set
1813    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1814    memory allocation.  Likewise, specify preallocated storage for the
1815    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1816 
1817    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1818    the figure below we depict these three local rows and all columns (0-11).
1819 
1820 .vb
1821            0 1 2 3 4 5 6 7 8 9 10 11
1822           -------------------
1823    row 3  |  o o o d d d o o o o o o
1824    row 4  |  o o o d d d o o o o o o
1825    row 5  |  o o o d d d o o o o o o
1826           -------------------
1827 .ve
1828 
1829    Thus, any entries in the d locations are stored in the d (diagonal)
1830    submatrix, and any entries in the o locations are stored in the
1831    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1832    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1833 
1834    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1835    plus the diagonal part of the d matrix,
1836    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1837    In general, for PDE problems in which most nonzeros are near the diagonal,
1838    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1839    or you will get TERRIBLE performance; see the users' manual chapter on
1840    matrices.
1841 
1842    Level: intermediate
1843 
1844 .keywords: matrix, block, aij, compressed row, sparse, parallel
1845 
1846 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1847 @*/
1848 
1849 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
1850 {
1851   PetscErrorCode ierr;
1852   PetscMPIInt    size;
1853 
1854   PetscFunctionBegin;
1855   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1856   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1857   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1858   if (size > 1) {
1859     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1860     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1861   } else {
1862     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1863     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1864   }
1865   PetscFunctionReturn(0);
1866 }
1867 
1868 
1869 #undef __FUNCT__
1870 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
1871 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1872 {
1873   Mat            mat;
1874   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1875   PetscErrorCode ierr;
1876   PetscInt       len=0,nt,bs=matin->bs,mbs=oldmat->mbs;
1877   PetscScalar    *array;
1878 
1879   PetscFunctionBegin;
1880   *newmat       = 0;
1881   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
1882   ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr);
1883   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1884   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1885 
1886   mat->factor       = matin->factor;
1887   mat->preallocated = PETSC_TRUE;
1888   mat->assembled    = PETSC_TRUE;
1889   mat->insertmode   = NOT_SET_VALUES;
1890 
1891   a = (Mat_MPISBAIJ*)mat->data;
1892   mat->bs  = matin->bs;
1893   a->bs2   = oldmat->bs2;
1894   a->mbs   = oldmat->mbs;
1895   a->nbs   = oldmat->nbs;
1896   a->Mbs   = oldmat->Mbs;
1897   a->Nbs   = oldmat->Nbs;
1898 
1899   a->rstart       = oldmat->rstart;
1900   a->rend         = oldmat->rend;
1901   a->cstart       = oldmat->cstart;
1902   a->cend         = oldmat->cend;
1903   a->size         = oldmat->size;
1904   a->rank         = oldmat->rank;
1905   a->donotstash   = oldmat->donotstash;
1906   a->roworiented  = oldmat->roworiented;
1907   a->rowindices   = 0;
1908   a->rowvalues    = 0;
1909   a->getrowactive = PETSC_FALSE;
1910   a->barray       = 0;
1911   a->rstart_bs    = oldmat->rstart_bs;
1912   a->rend_bs      = oldmat->rend_bs;
1913   a->cstart_bs    = oldmat->cstart_bs;
1914   a->cend_bs      = oldmat->cend_bs;
1915 
1916   /* hash table stuff */
1917   a->ht           = 0;
1918   a->hd           = 0;
1919   a->ht_size      = 0;
1920   a->ht_flag      = oldmat->ht_flag;
1921   a->ht_fact      = oldmat->ht_fact;
1922   a->ht_total_ct  = 0;
1923   a->ht_insert_ct = 0;
1924 
1925   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1926   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1927   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
1928   if (oldmat->colmap) {
1929 #if defined (PETSC_USE_CTABLE)
1930     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1931 #else
1932     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1933     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1934     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1935 #endif
1936   } else a->colmap = 0;
1937 
1938   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
1939     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1940     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1941     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
1942   } else a->garray = 0;
1943 
1944   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1945   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1946   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1947   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1948 
1949   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
1950   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
1951   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
1952   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
1953 
1954   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
1955   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
1956   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
1957   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
1958   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
1959   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
1960   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
1961   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
1962   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
1963   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
1964   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
1965   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
1966   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
1967 
1968   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
1969   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
1970   a->sMvctx = oldmat->sMvctx;
1971   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
1972 
1973   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1974   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1975   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1976   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
1977   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
1978   *newmat = mat;
1979   PetscFunctionReturn(0);
1980 }
1981 
1982 #include "petscsys.h"
1983 
1984 #undef __FUNCT__
1985 #define __FUNCT__ "MatLoad_MPISBAIJ"
1986 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
1987 {
1988   Mat            A;
1989   PetscErrorCode ierr;
1990   PetscInt       i,nz,j,rstart,rend;
1991   PetscScalar    *vals,*buf;
1992   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1993   MPI_Status     status;
1994   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners;
1995   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1996   PetscInt       *locrowlens,*procsnz = 0,jj,*mycols,*ibuf;
1997   PetscInt       bs=1,Mbs,mbs,extra_rows;
1998   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
1999   PetscInt       dcount,kmax,k,nzcount,tmp;
2000   int            fd;
2001 
2002   PetscFunctionBegin;
2003   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2004 
2005   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2006   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2007   if (!rank) {
2008     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2009     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2010     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2011     if (header[3] < 0) {
2012       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2013     }
2014   }
2015 
2016   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2017   M = header[1]; N = header[2];
2018 
2019   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2020 
2021   /*
2022      This code adds extra rows to make sure the number of rows is
2023      divisible by the blocksize
2024   */
2025   Mbs        = M/bs;
2026   extra_rows = bs - M + bs*(Mbs);
2027   if (extra_rows == bs) extra_rows = 0;
2028   else                  Mbs++;
2029   if (extra_rows &&!rank) {
2030     ierr = PetscLogInfo((0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2031   }
2032 
2033   /* determine ownership of all rows */
2034   mbs        = Mbs/size + ((Mbs % size) > rank);
2035   m          = mbs*bs;
2036   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2037   browners   = rowners + size + 1;
2038   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2039   rowners[0] = 0;
2040   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2041   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2042   rstart = rowners[rank];
2043   rend   = rowners[rank+1];
2044 
2045   /* distribute row lengths to all processors */
2046   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2047   if (!rank) {
2048     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2049     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2050     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2051     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2052     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2053     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2054     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2055   } else {
2056     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2057   }
2058 
2059   if (!rank) {   /* procs[0] */
2060     /* calculate the number of nonzeros on each processor */
2061     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2062     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2063     for (i=0; i<size; i++) {
2064       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2065         procsnz[i] += rowlengths[j];
2066       }
2067     }
2068     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2069 
2070     /* determine max buffer needed and allocate it */
2071     maxnz = 0;
2072     for (i=0; i<size; i++) {
2073       maxnz = PetscMax(maxnz,procsnz[i]);
2074     }
2075     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2076 
2077     /* read in my part of the matrix column indices  */
2078     nz     = procsnz[0];
2079     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2080     mycols = ibuf;
2081     if (size == 1)  nz -= extra_rows;
2082     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2083     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2084 
2085     /* read in every ones (except the last) and ship off */
2086     for (i=1; i<size-1; i++) {
2087       nz   = procsnz[i];
2088       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2089       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2090     }
2091     /* read in the stuff for the last proc */
2092     if (size != 1) {
2093       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2094       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2095       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2096       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2097     }
2098     ierr = PetscFree(cols);CHKERRQ(ierr);
2099   } else {  /* procs[i], i>0 */
2100     /* determine buffer space needed for message */
2101     nz = 0;
2102     for (i=0; i<m; i++) {
2103       nz += locrowlens[i];
2104     }
2105     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2106     mycols = ibuf;
2107     /* receive message of column indices*/
2108     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2109     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2110     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2111   }
2112 
2113   /* loop over local rows, determining number of off diagonal entries */
2114   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2115   odlens   = dlens + (rend-rstart);
2116   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2117   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2118   masked1  = mask    + Mbs;
2119   masked2  = masked1 + Mbs;
2120   rowcount = 0; nzcount = 0;
2121   for (i=0; i<mbs; i++) {
2122     dcount  = 0;
2123     odcount = 0;
2124     for (j=0; j<bs; j++) {
2125       kmax = locrowlens[rowcount];
2126       for (k=0; k<kmax; k++) {
2127         tmp = mycols[nzcount++]/bs; /* block col. index */
2128         if (!mask[tmp]) {
2129           mask[tmp] = 1;
2130           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2131           else masked1[dcount++] = tmp; /* entry in diag portion */
2132         }
2133       }
2134       rowcount++;
2135     }
2136 
2137     dlens[i]  = dcount;  /* d_nzz[i] */
2138     odlens[i] = odcount; /* o_nzz[i] */
2139 
2140     /* zero out the mask elements we set */
2141     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2142     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2143   }
2144 
2145   /* create our matrix */
2146   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2147   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2148   ierr = MatSetType(A,type);CHKERRQ(ierr);
2149   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2150   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2151 
2152   if (!rank) {
2153     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2154     /* read in my part of the matrix numerical values  */
2155     nz = procsnz[0];
2156     vals = buf;
2157     mycols = ibuf;
2158     if (size == 1)  nz -= extra_rows;
2159     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2160     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2161 
2162     /* insert into matrix */
2163     jj      = rstart*bs;
2164     for (i=0; i<m; i++) {
2165       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2166       mycols += locrowlens[i];
2167       vals   += locrowlens[i];
2168       jj++;
2169     }
2170 
2171     /* read in other processors (except the last one) and ship out */
2172     for (i=1; i<size-1; i++) {
2173       nz   = procsnz[i];
2174       vals = buf;
2175       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2176       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2177     }
2178     /* the last proc */
2179     if (size != 1){
2180       nz   = procsnz[i] - extra_rows;
2181       vals = buf;
2182       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2183       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2184       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2185     }
2186     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2187 
2188   } else {
2189     /* receive numeric values */
2190     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2191 
2192     /* receive message of values*/
2193     vals   = buf;
2194     mycols = ibuf;
2195     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2196     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2197     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2198 
2199     /* insert into matrix */
2200     jj      = rstart*bs;
2201     for (i=0; i<m; i++) {
2202       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2203       mycols += locrowlens[i];
2204       vals   += locrowlens[i];
2205       jj++;
2206     }
2207   }
2208 
2209   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2210   ierr = PetscFree(buf);CHKERRQ(ierr);
2211   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2212   ierr = PetscFree(rowners);CHKERRQ(ierr);
2213   ierr = PetscFree(dlens);CHKERRQ(ierr);
2214   ierr = PetscFree(mask);CHKERRQ(ierr);
2215   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2216   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2217   *newmat = A;
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 #undef __FUNCT__
2222 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2223 /*XXXXX@
2224    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2225 
2226    Input Parameters:
2227 .  mat  - the matrix
2228 .  fact - factor
2229 
2230    Collective on Mat
2231 
2232    Level: advanced
2233 
2234   Notes:
2235    This can also be set by the command line option: -mat_use_hash_table fact
2236 
2237 .keywords: matrix, hashtable, factor, HT
2238 
2239 .seealso: MatSetOption()
2240 @XXXXX*/
2241 
2242 
2243 #undef __FUNCT__
2244 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2245 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2246 {
2247   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2248   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2249   PetscReal      atmp;
2250   PetscReal      *work,*svalues,*rvalues;
2251   PetscErrorCode ierr;
2252   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2253   PetscMPIInt    rank,size;
2254   PetscInt       *rowners_bs,dest,count,source;
2255   PetscScalar    *va;
2256   MatScalar      *ba;
2257   MPI_Status     stat;
2258 
2259   PetscFunctionBegin;
2260   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2261   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2262 
2263   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2264   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2265 
2266   bs   = A->bs;
2267   mbs  = a->mbs;
2268   Mbs  = a->Mbs;
2269   ba   = b->a;
2270   bi   = b->i;
2271   bj   = b->j;
2272 
2273   /* find ownerships */
2274   rowners_bs = a->rowners_bs;
2275 
2276   /* each proc creates an array to be distributed */
2277   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2278   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2279 
2280   /* row_max for B */
2281   if (rank != size-1){
2282     for (i=0; i<mbs; i++) {
2283       ncols = bi[1] - bi[0]; bi++;
2284       brow  = bs*i;
2285       for (j=0; j<ncols; j++){
2286         bcol = bs*(*bj);
2287         for (kcol=0; kcol<bs; kcol++){
2288           col = bcol + kcol;                 /* local col index */
2289           col += rowners_bs[rank+1];      /* global col index */
2290           for (krow=0; krow<bs; krow++){
2291             atmp = PetscAbsScalar(*ba); ba++;
2292             row = brow + krow;    /* local row index */
2293             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2294             if (work[col] < atmp) work[col] = atmp;
2295           }
2296         }
2297         bj++;
2298       }
2299     }
2300 
2301     /* send values to its owners */
2302     for (dest=rank+1; dest<size; dest++){
2303       svalues = work + rowners_bs[dest];
2304       count   = rowners_bs[dest+1]-rowners_bs[dest];
2305       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2306     }
2307   }
2308 
2309   /* receive values */
2310   if (rank){
2311     rvalues = work;
2312     count   = rowners_bs[rank+1]-rowners_bs[rank];
2313     for (source=0; source<rank; source++){
2314       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2315       /* process values */
2316       for (i=0; i<count; i++){
2317         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2318       }
2319     }
2320   }
2321 
2322   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2323   ierr = PetscFree(work);CHKERRQ(ierr);
2324   PetscFunctionReturn(0);
2325 }
2326 
2327 #undef __FUNCT__
2328 #define __FUNCT__ "MatRelax_MPISBAIJ"
2329 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2330 {
2331   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2332   PetscErrorCode ierr;
2333   PetscInt       mbs=mat->mbs,bs=matin->bs;
2334   PetscScalar    *x,*b,*ptr,zero=0.0;
2335   Vec            bb1;
2336 
2337   PetscFunctionBegin;
2338   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2339   if (bs > 1)
2340     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2341 
2342   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2343     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2344       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2345       its--;
2346     }
2347 
2348     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2349     while (its--){
2350 
2351       /* lower triangular part: slvec0b = - B^T*xx */
2352       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2353 
2354       /* copy xx into slvec0a */
2355       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2356       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2357       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2358       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2359 
2360       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2361 
2362       /* copy bb into slvec1a */
2363       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2364       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2365       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2366       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2367 
2368       /* set slvec1b = 0 */
2369       ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr);
2370 
2371       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2372       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2373       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2374       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2375 
2376       /* upper triangular part: bb1 = bb1 - B*x */
2377       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2378 
2379       /* local diagonal sweep */
2380       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2381     }
2382     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2383   } else {
2384     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2385   }
2386   PetscFunctionReturn(0);
2387 }
2388 
2389 #undef __FUNCT__
2390 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2391 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2392 {
2393   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2394   PetscErrorCode ierr;
2395   Vec            lvec1,bb1;
2396 
2397   PetscFunctionBegin;
2398   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2399   if (matin->bs > 1)
2400     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2401 
2402   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2403     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2404       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2405       its--;
2406     }
2407 
2408     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2409     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2410     while (its--){
2411       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2412 
2413       /* lower diagonal part: bb1 = bb - B^T*xx */
2414       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2415       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2416 
2417       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2418       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2419       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2420 
2421       /* upper diagonal part: bb1 = bb1 - B*x */
2422       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2423       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2424 
2425       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2426 
2427       /* diagonal sweep */
2428       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2429     }
2430     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2431     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2432   } else {
2433     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2434   }
2435   PetscFunctionReturn(0);
2436 }
2437 
2438