xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 22612f2f7cceb60caedd65384cdf99fc989f2aeb)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 
5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
15 
16 /*  UGLY, ugly, ugly
17    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
18    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
19    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
20    converts the entries into single precision and then calls ..._MatScalar() to put them
21    into the single precision data structures.
22 */
23 #if defined(PETSC_USE_MAT_SINGLE)
24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
29 #else
30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
31 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
33 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
35 #endif
36 
37 #undef __FUNCT__
38 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
39 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
40 {
41   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
42   PetscErrorCode ierr;
43   PetscInt       i,*idxb = 0;
44   PetscScalar    *va,*vb;
45   Vec            vtmp;
46 
47   PetscFunctionBegin;
48 
49   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
50   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
51   if (idx) {
52     for (i=0; i<A->cmap.n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap.rstart;}
53   }
54 
55   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr);
56   if (idx) {ierr = PetscMalloc(A->rmap.n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
57   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
58   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
59 
60   for (i=0; i<A->rmap.n; i++){
61     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap.bs*a->garray[idxb[i]/A->cmap.bs] + (idxb[i] % A->cmap.bs);}
62   }
63 
64   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
65   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
66   if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);}
67   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
68   PetscFunctionReturn(0);
69 }
70 
71 EXTERN_C_BEGIN
72 #undef __FUNCT__
73 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
74 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
75 {
76   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
77   PetscErrorCode ierr;
78 
79   PetscFunctionBegin;
80   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
81   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
82   PetscFunctionReturn(0);
83 }
84 EXTERN_C_END
85 
86 EXTERN_C_BEGIN
87 #undef __FUNCT__
88 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
89 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
90 {
91   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
92   PetscErrorCode ierr;
93 
94   PetscFunctionBegin;
95   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
96   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
97   PetscFunctionReturn(0);
98 }
99 EXTERN_C_END
100 
101 /*
102      Local utility routine that creates a mapping from the global column
103    number to the local number in the off-diagonal part of the local
104    storage of the matrix.  This is done in a non scable way since the
105    length of colmap equals the global matrix length.
106 */
107 #undef __FUNCT__
108 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
109 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
110 {
111   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
112   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
113   PetscErrorCode ierr;
114   PetscInt       nbs = B->nbs,i,bs=mat->rmap.bs;
115 
116   PetscFunctionBegin;
117 #if defined (PETSC_USE_CTABLE)
118   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
119   for (i=0; i<nbs; i++){
120     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
121   }
122 #else
123   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
124   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
125   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
126   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
127 #endif
128   PetscFunctionReturn(0);
129 }
130 
131 #define CHUNKSIZE  10
132 
133 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
134 { \
135  \
136     brow = row/bs;  \
137     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
138     rmax = aimax[brow]; nrow = ailen[brow]; \
139       bcol = col/bs; \
140       ridx = row % bs; cidx = col % bs; \
141       low = 0; high = nrow; \
142       while (high-low > 3) { \
143         t = (low+high)/2; \
144         if (rp[t] > bcol) high = t; \
145         else              low  = t; \
146       } \
147       for (_i=low; _i<high; _i++) { \
148         if (rp[_i] > bcol) break; \
149         if (rp[_i] == bcol) { \
150           bap  = ap +  bs2*_i + bs*cidx + ridx; \
151           if (addv == ADD_VALUES) *bap += value;  \
152           else                    *bap  = value;  \
153           goto a_noinsert; \
154         } \
155       } \
156       if (a->nonew == 1) goto a_noinsert; \
157       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
158       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
159       N = nrow++ - 1;  \
160       /* shift up all the later entries in this row */ \
161       for (ii=N; ii>=_i; ii--) { \
162         rp[ii+1] = rp[ii]; \
163         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
164       } \
165       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
166       rp[_i]                      = bcol;  \
167       ap[bs2*_i + bs*cidx + ridx] = value;  \
168       a_noinsert:; \
169     ailen[brow] = nrow; \
170 }
171 
172 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
173 { \
174     brow = row/bs;  \
175     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
176     rmax = bimax[brow]; nrow = bilen[brow]; \
177       bcol = col/bs; \
178       ridx = row % bs; cidx = col % bs; \
179       low = 0; high = nrow; \
180       while (high-low > 3) { \
181         t = (low+high)/2; \
182         if (rp[t] > bcol) high = t; \
183         else              low  = t; \
184       } \
185       for (_i=low; _i<high; _i++) { \
186         if (rp[_i] > bcol) break; \
187         if (rp[_i] == bcol) { \
188           bap  = ap +  bs2*_i + bs*cidx + ridx; \
189           if (addv == ADD_VALUES) *bap += value;  \
190           else                    *bap  = value;  \
191           goto b_noinsert; \
192         } \
193       } \
194       if (b->nonew == 1) goto b_noinsert; \
195       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
196       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
197       CHKMEMQ;\
198       N = nrow++ - 1;  \
199       /* shift up all the later entries in this row */ \
200       for (ii=N; ii>=_i; ii--) { \
201         rp[ii+1] = rp[ii]; \
202         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
203       } \
204       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
205       rp[_i]                      = bcol;  \
206       ap[bs2*_i + bs*cidx + ridx] = value;  \
207       b_noinsert:; \
208     bilen[brow] = nrow; \
209 }
210 
211 #if defined(PETSC_USE_MAT_SINGLE)
212 #undef __FUNCT__
213 #define __FUNCT__ "MatSetValues_MPIBAIJ"
214 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
215 {
216   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
217   PetscErrorCode ierr;
218   PetscInt       i,N = m*n;
219   MatScalar      *vsingle;
220 
221   PetscFunctionBegin;
222   if (N > b->setvalueslen) {
223     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
224     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
225     b->setvalueslen  = N;
226   }
227   vsingle = b->setvaluescopy;
228 
229   for (i=0; i<N; i++) {
230     vsingle[i] = v[i];
231   }
232   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
233   PetscFunctionReturn(0);
234 }
235 
236 #undef __FUNCT__
237 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
238 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
239 {
240   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
241   PetscErrorCode ierr;
242   PetscInt       i,N = m*n*b->bs2;
243   MatScalar      *vsingle;
244 
245   PetscFunctionBegin;
246   if (N > b->setvalueslen) {
247     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
248     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
249     b->setvalueslen  = N;
250   }
251   vsingle = b->setvaluescopy;
252   for (i=0; i<N; i++) {
253     vsingle[i] = v[i];
254   }
255   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
256   PetscFunctionReturn(0);
257 }
258 
259 #undef __FUNCT__
260 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
261 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
262 {
263   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
264   PetscErrorCode ierr;
265   PetscInt       i,N = m*n;
266   MatScalar      *vsingle;
267 
268   PetscFunctionBegin;
269   if (N > b->setvalueslen) {
270     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
271     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
272     b->setvalueslen  = N;
273   }
274   vsingle = b->setvaluescopy;
275   for (i=0; i<N; i++) {
276     vsingle[i] = v[i];
277   }
278   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
284 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
285 {
286   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
287   PetscErrorCode ierr;
288   PetscInt       i,N = m*n*b->bs2;
289   MatScalar      *vsingle;
290 
291   PetscFunctionBegin;
292   if (N > b->setvalueslen) {
293     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
294     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
295     b->setvalueslen  = N;
296   }
297   vsingle = b->setvaluescopy;
298   for (i=0; i<N; i++) {
299     vsingle[i] = v[i];
300   }
301   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
302   PetscFunctionReturn(0);
303 }
304 #endif
305 
306 #undef __FUNCT__
307 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
308 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
309 {
310   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
311   MatScalar      value;
312   PetscTruth     roworiented = baij->roworiented;
313   PetscErrorCode ierr;
314   PetscInt       i,j,row,col;
315   PetscInt       rstart_orig=mat->rmap.rstart;
316   PetscInt       rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart;
317   PetscInt       cend_orig=mat->cmap.rend,bs=mat->rmap.bs;
318 
319   /* Some Variables required in the macro */
320   Mat            A = baij->A;
321   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
322   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
323   MatScalar      *aa=a->a;
324 
325   Mat            B = baij->B;
326   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
327   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
328   MatScalar      *ba=b->a;
329 
330   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
331   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
332   MatScalar      *ap,*bap;
333 
334   PetscFunctionBegin;
335   for (i=0; i<m; i++) {
336     if (im[i] < 0) continue;
337 #if defined(PETSC_USE_DEBUG)
338     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
339 #endif
340     if (im[i] >= rstart_orig && im[i] < rend_orig) {
341       row = im[i] - rstart_orig;
342       for (j=0; j<n; j++) {
343         if (in[j] >= cstart_orig && in[j] < cend_orig){
344           col = in[j] - cstart_orig;
345           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
346           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
347           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
348         } else if (in[j] < 0) continue;
349 #if defined(PETSC_USE_DEBUG)
350         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap.N-1);}
351 #endif
352         else {
353           if (mat->was_assembled) {
354             if (!baij->colmap) {
355               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
356             }
357 #if defined (PETSC_USE_CTABLE)
358             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
359             col  = col - 1;
360 #else
361             col = baij->colmap[in[j]/bs] - 1;
362 #endif
363             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
364               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
365               col =  in[j];
366               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
367               B = baij->B;
368               b = (Mat_SeqBAIJ*)(B)->data;
369               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
370               ba=b->a;
371             } else col += in[j]%bs;
372           } else col = in[j];
373           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
374           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
375           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
376         }
377       }
378     } else {
379       if (!baij->donotstash) {
380         if (roworiented) {
381           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
382         } else {
383           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
384         }
385       }
386     }
387   }
388   PetscFunctionReturn(0);
389 }
390 
391 #undef __FUNCT__
392 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
393 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
394 {
395   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
396   const MatScalar *value;
397   MatScalar       *barray=baij->barray;
398   PetscTruth      roworiented = baij->roworiented;
399   PetscErrorCode  ierr;
400   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
401   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
402   PetscInt        cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2;
403 
404   PetscFunctionBegin;
405   if(!barray) {
406     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
407     baij->barray = barray;
408   }
409 
410   if (roworiented) {
411     stepval = (n-1)*bs;
412   } else {
413     stepval = (m-1)*bs;
414   }
415   for (i=0; i<m; i++) {
416     if (im[i] < 0) continue;
417 #if defined(PETSC_USE_DEBUG)
418     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
419 #endif
420     if (im[i] >= rstart && im[i] < rend) {
421       row = im[i] - rstart;
422       for (j=0; j<n; j++) {
423         /* If NumCol = 1 then a copy is not required */
424         if ((roworiented) && (n == 1)) {
425           barray = (MatScalar*)v + i*bs2;
426         } else if((!roworiented) && (m == 1)) {
427           barray = (MatScalar*)v + j*bs2;
428         } else { /* Here a copy is required */
429           if (roworiented) {
430             value = v + i*(stepval+bs)*bs + j*bs;
431           } else {
432             value = v + j*(stepval+bs)*bs + i*bs;
433           }
434           for (ii=0; ii<bs; ii++,value+=stepval) {
435             for (jj=0; jj<bs; jj++) {
436               *barray++  = *value++;
437             }
438           }
439           barray -=bs2;
440         }
441 
442         if (in[j] >= cstart && in[j] < cend){
443           col  = in[j] - cstart;
444           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
445         }
446         else if (in[j] < 0) continue;
447 #if defined(PETSC_USE_DEBUG)
448         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
449 #endif
450         else {
451           if (mat->was_assembled) {
452             if (!baij->colmap) {
453               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
454             }
455 
456 #if defined(PETSC_USE_DEBUG)
457 #if defined (PETSC_USE_CTABLE)
458             { PetscInt data;
459               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
460               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
461             }
462 #else
463             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
464 #endif
465 #endif
466 #if defined (PETSC_USE_CTABLE)
467 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
468             col  = (col - 1)/bs;
469 #else
470             col = (baij->colmap[in[j]] - 1)/bs;
471 #endif
472             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
473               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
474               col =  in[j];
475             }
476           }
477           else col = in[j];
478           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
479         }
480       }
481     } else {
482       if (!baij->donotstash) {
483         if (roworiented) {
484           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
485         } else {
486           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
487         }
488       }
489     }
490   }
491   PetscFunctionReturn(0);
492 }
493 
494 #define HASH_KEY 0.6180339887
495 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
496 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
497 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
498 #undef __FUNCT__
499 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
500 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
501 {
502   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
503   PetscTruth     roworiented = baij->roworiented;
504   PetscErrorCode ierr;
505   PetscInt       i,j,row,col;
506   PetscInt       rstart_orig=mat->rmap.rstart;
507   PetscInt       rend_orig=mat->rmap.rend,Nbs=baij->Nbs;
508   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap.bs,*HT=baij->ht,idx;
509   PetscReal      tmp;
510   MatScalar      **HD = baij->hd,value;
511 #if defined(PETSC_USE_DEBUG)
512   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
513 #endif
514 
515   PetscFunctionBegin;
516 
517   for (i=0; i<m; i++) {
518 #if defined(PETSC_USE_DEBUG)
519     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
520     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
521 #endif
522       row = im[i];
523     if (row >= rstart_orig && row < rend_orig) {
524       for (j=0; j<n; j++) {
525         col = in[j];
526         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
527         /* Look up PetscInto the Hash Table */
528         key = (row/bs)*Nbs+(col/bs)+1;
529         h1  = HASH(size,key,tmp);
530 
531 
532         idx = h1;
533 #if defined(PETSC_USE_DEBUG)
534         insert_ct++;
535         total_ct++;
536         if (HT[idx] != key) {
537           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
538           if (idx == size) {
539             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
540             if (idx == h1) {
541               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
542             }
543           }
544         }
545 #else
546         if (HT[idx] != key) {
547           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
548           if (idx == size) {
549             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
550             if (idx == h1) {
551               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
552             }
553           }
554         }
555 #endif
556         /* A HASH table entry is found, so insert the values at the correct address */
557         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
558         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
559       }
560     } else {
561       if (!baij->donotstash) {
562         if (roworiented) {
563           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
564         } else {
565           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
566         }
567       }
568     }
569   }
570 #if defined(PETSC_USE_DEBUG)
571   baij->ht_total_ct = total_ct;
572   baij->ht_insert_ct = insert_ct;
573 #endif
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
579 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
580 {
581   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
582   PetscTruth      roworiented = baij->roworiented;
583   PetscErrorCode  ierr;
584   PetscInt        i,j,ii,jj,row,col;
585   PetscInt        rstart=baij->rstartbs;
586   PetscInt        rend=mat->rmap.rend,stepval,bs=mat->rmap.bs,bs2=baij->bs2,nbs2=n*bs2;
587   PetscInt        h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
588   PetscReal       tmp;
589   MatScalar       **HD = baij->hd,*baij_a;
590   const MatScalar *v_t,*value;
591 #if defined(PETSC_USE_DEBUG)
592   PetscInt        total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
593 #endif
594 
595   PetscFunctionBegin;
596 
597   if (roworiented) {
598     stepval = (n-1)*bs;
599   } else {
600     stepval = (m-1)*bs;
601   }
602   for (i=0; i<m; i++) {
603 #if defined(PETSC_USE_DEBUG)
604     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
605     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
606 #endif
607     row   = im[i];
608     v_t   = v + i*nbs2;
609     if (row >= rstart && row < rend) {
610       for (j=0; j<n; j++) {
611         col = in[j];
612 
613         /* Look up into the Hash Table */
614         key = row*Nbs+col+1;
615         h1  = HASH(size,key,tmp);
616 
617         idx = h1;
618 #if defined(PETSC_USE_DEBUG)
619         total_ct++;
620         insert_ct++;
621        if (HT[idx] != key) {
622           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
623           if (idx == size) {
624             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
625             if (idx == h1) {
626               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
627             }
628           }
629         }
630 #else
631         if (HT[idx] != key) {
632           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
633           if (idx == size) {
634             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
635             if (idx == h1) {
636               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
637             }
638           }
639         }
640 #endif
641         baij_a = HD[idx];
642         if (roworiented) {
643           /*value = v + i*(stepval+bs)*bs + j*bs;*/
644           /* value = v + (i*(stepval+bs)+j)*bs; */
645           value = v_t;
646           v_t  += bs;
647           if (addv == ADD_VALUES) {
648             for (ii=0; ii<bs; ii++,value+=stepval) {
649               for (jj=ii; jj<bs2; jj+=bs) {
650                 baij_a[jj]  += *value++;
651               }
652             }
653           } else {
654             for (ii=0; ii<bs; ii++,value+=stepval) {
655               for (jj=ii; jj<bs2; jj+=bs) {
656                 baij_a[jj]  = *value++;
657               }
658             }
659           }
660         } else {
661           value = v + j*(stepval+bs)*bs + i*bs;
662           if (addv == ADD_VALUES) {
663             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
664               for (jj=0; jj<bs; jj++) {
665                 baij_a[jj]  += *value++;
666               }
667             }
668           } else {
669             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
670               for (jj=0; jj<bs; jj++) {
671                 baij_a[jj]  = *value++;
672               }
673             }
674           }
675         }
676       }
677     } else {
678       if (!baij->donotstash) {
679         if (roworiented) {
680           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
681         } else {
682           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
683         }
684       }
685     }
686   }
687 #if defined(PETSC_USE_DEBUG)
688   baij->ht_total_ct = total_ct;
689   baij->ht_insert_ct = insert_ct;
690 #endif
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatGetValues_MPIBAIJ"
696 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
697 {
698   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
699   PetscErrorCode ierr;
700   PetscInt       bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend;
701   PetscInt       bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data;
702 
703   PetscFunctionBegin;
704   for (i=0; i<m; i++) {
705     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
706     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
707     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
708       row = idxm[i] - bsrstart;
709       for (j=0; j<n; j++) {
710         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
711         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
712         if (idxn[j] >= bscstart && idxn[j] < bscend){
713           col = idxn[j] - bscstart;
714           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
715         } else {
716           if (!baij->colmap) {
717             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
718           }
719 #if defined (PETSC_USE_CTABLE)
720           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
721           data --;
722 #else
723           data = baij->colmap[idxn[j]/bs]-1;
724 #endif
725           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
726           else {
727             col  = data + idxn[j]%bs;
728             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
729           }
730         }
731       }
732     } else {
733       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
734     }
735   }
736  PetscFunctionReturn(0);
737 }
738 
739 #undef __FUNCT__
740 #define __FUNCT__ "MatNorm_MPIBAIJ"
741 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
742 {
743   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
744   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
745   PetscErrorCode ierr;
746   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap.bs,nz,row,col;
747   PetscReal      sum = 0.0;
748   MatScalar      *v;
749 
750   PetscFunctionBegin;
751   if (baij->size == 1) {
752     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
753   } else {
754     if (type == NORM_FROBENIUS) {
755       v = amat->a;
756       nz = amat->nz*bs2;
757       for (i=0; i<nz; i++) {
758 #if defined(PETSC_USE_COMPLEX)
759         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
760 #else
761         sum += (*v)*(*v); v++;
762 #endif
763       }
764       v = bmat->a;
765       nz = bmat->nz*bs2;
766       for (i=0; i<nz; i++) {
767 #if defined(PETSC_USE_COMPLEX)
768         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
769 #else
770         sum += (*v)*(*v); v++;
771 #endif
772       }
773       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
774       *nrm = sqrt(*nrm);
775     } else if (type == NORM_1) { /* max column sum */
776       PetscReal *tmp,*tmp2;
777       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
778       ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
779       tmp2 = tmp + mat->cmap.N;
780       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
781       v = amat->a; jj = amat->j;
782       for (i=0; i<amat->nz; i++) {
783         for (j=0; j<bs; j++){
784           col = bs*(cstart + *jj) + j; /* column index */
785           for (row=0; row<bs; row++){
786             tmp[col] += PetscAbsScalar(*v);  v++;
787           }
788         }
789         jj++;
790       }
791       v = bmat->a; jj = bmat->j;
792       for (i=0; i<bmat->nz; i++) {
793         for (j=0; j<bs; j++){
794           col = bs*garray[*jj] + j;
795           for (row=0; row<bs; row++){
796             tmp[col] += PetscAbsScalar(*v); v++;
797           }
798         }
799         jj++;
800       }
801       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
802       *nrm = 0.0;
803       for (j=0; j<mat->cmap.N; j++) {
804         if (tmp2[j] > *nrm) *nrm = tmp2[j];
805       }
806       ierr = PetscFree(tmp);CHKERRQ(ierr);
807     } else if (type == NORM_INFINITY) { /* max row sum */
808       PetscReal *sums;
809       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
810       sum = 0.0;
811       for (j=0; j<amat->mbs; j++) {
812         for (row=0; row<bs; row++) sums[row] = 0.0;
813         v = amat->a + bs2*amat->i[j];
814         nz = amat->i[j+1]-amat->i[j];
815         for (i=0; i<nz; i++) {
816           for (col=0; col<bs; col++){
817             for (row=0; row<bs; row++){
818               sums[row] += PetscAbsScalar(*v); v++;
819             }
820           }
821         }
822         v = bmat->a + bs2*bmat->i[j];
823         nz = bmat->i[j+1]-bmat->i[j];
824         for (i=0; i<nz; i++) {
825           for (col=0; col<bs; col++){
826             for (row=0; row<bs; row++){
827               sums[row] += PetscAbsScalar(*v); v++;
828             }
829           }
830         }
831         for (row=0; row<bs; row++){
832           if (sums[row] > sum) sum = sums[row];
833         }
834       }
835       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
836       ierr = PetscFree(sums);CHKERRQ(ierr);
837     } else {
838       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
839     }
840   }
841   PetscFunctionReturn(0);
842 }
843 
844 /*
845   Creates the hash table, and sets the table
846   This table is created only once.
847   If new entried need to be added to the matrix
848   then the hash table has to be destroyed and
849   recreated.
850 */
851 #undef __FUNCT__
852 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
853 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
854 {
855   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
856   Mat            A = baij->A,B=baij->B;
857   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
858   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
859   PetscErrorCode ierr;
860   PetscInt       size,bs2=baij->bs2,rstart=baij->rstartbs;
861   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
862   PetscInt       *HT,key;
863   MatScalar      **HD;
864   PetscReal      tmp;
865 #if defined(PETSC_USE_INFO)
866   PetscInt       ct=0,max=0;
867 #endif
868 
869   PetscFunctionBegin;
870   baij->ht_size=(PetscInt)(factor*nz);
871   size = baij->ht_size;
872 
873   if (baij->ht) {
874     PetscFunctionReturn(0);
875   }
876 
877   /* Allocate Memory for Hash Table */
878   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
879   baij->ht = (PetscInt*)(baij->hd + size);
880   HD       = baij->hd;
881   HT       = baij->ht;
882 
883 
884   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
885 
886 
887   /* Loop Over A */
888   for (i=0; i<a->mbs; i++) {
889     for (j=ai[i]; j<ai[i+1]; j++) {
890       row = i+rstart;
891       col = aj[j]+cstart;
892 
893       key = row*Nbs + col + 1;
894       h1  = HASH(size,key,tmp);
895       for (k=0; k<size; k++){
896         if (!HT[(h1+k)%size]) {
897           HT[(h1+k)%size] = key;
898           HD[(h1+k)%size] = a->a + j*bs2;
899           break;
900 #if defined(PETSC_USE_INFO)
901         } else {
902           ct++;
903 #endif
904         }
905       }
906 #if defined(PETSC_USE_INFO)
907       if (k> max) max = k;
908 #endif
909     }
910   }
911   /* Loop Over B */
912   for (i=0; i<b->mbs; i++) {
913     for (j=bi[i]; j<bi[i+1]; j++) {
914       row = i+rstart;
915       col = garray[bj[j]];
916       key = row*Nbs + col + 1;
917       h1  = HASH(size,key,tmp);
918       for (k=0; k<size; k++){
919         if (!HT[(h1+k)%size]) {
920           HT[(h1+k)%size] = key;
921           HD[(h1+k)%size] = b->a + j*bs2;
922           break;
923 #if defined(PETSC_USE_INFO)
924         } else {
925           ct++;
926 #endif
927         }
928       }
929 #if defined(PETSC_USE_INFO)
930       if (k> max) max = k;
931 #endif
932     }
933   }
934 
935   /* Print Summary */
936 #if defined(PETSC_USE_INFO)
937   for (i=0,j=0; i<size; i++) {
938     if (HT[i]) {j++;}
939   }
940   ierr = PetscInfo2(0,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
941 #endif
942   PetscFunctionReturn(0);
943 }
944 
945 #undef __FUNCT__
946 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
947 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
948 {
949   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
950   PetscErrorCode ierr;
951   PetscInt       nstash,reallocs;
952   InsertMode     addv;
953 
954   PetscFunctionBegin;
955   if (baij->donotstash) {
956     PetscFunctionReturn(0);
957   }
958 
959   /* make sure all processors are either in INSERTMODE or ADDMODE */
960   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
961   if (addv == (ADD_VALUES|INSERT_VALUES)) {
962     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
963   }
964   mat->insertmode = addv; /* in case this processor had no cache */
965 
966   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
967   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr);
968   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
969   ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
970   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
971   ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNCT__
976 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
977 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
978 {
979   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
980   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
981   PetscErrorCode ierr;
982   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
983   PetscInt       *row,*col,other_disassembled;
984   PetscTruth     r1,r2,r3;
985   MatScalar      *val;
986   InsertMode     addv = mat->insertmode;
987   PetscMPIInt    n;
988 
989   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
990   PetscFunctionBegin;
991   if (!baij->donotstash) {
992     while (1) {
993       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
994       if (!flg) break;
995 
996       for (i=0; i<n;) {
997         /* Now identify the consecutive vals belonging to the same row */
998         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
999         if (j < n) ncols = j-i;
1000         else       ncols = n-i;
1001         /* Now assemble all these values with a single function call */
1002         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
1003         i = j;
1004       }
1005     }
1006     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1007     /* Now process the block-stash. Since the values are stashed column-oriented,
1008        set the roworiented flag to column oriented, and after MatSetValues()
1009        restore the original flags */
1010     r1 = baij->roworiented;
1011     r2 = a->roworiented;
1012     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
1013     baij->roworiented = PETSC_FALSE;
1014     a->roworiented    = PETSC_FALSE;
1015     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
1016     while (1) {
1017       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1018       if (!flg) break;
1019 
1020       for (i=0; i<n;) {
1021         /* Now identify the consecutive vals belonging to the same row */
1022         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1023         if (j < n) ncols = j-i;
1024         else       ncols = n-i;
1025         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1026         i = j;
1027       }
1028     }
1029     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1030     baij->roworiented = r1;
1031     a->roworiented    = r2;
1032     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
1033   }
1034 
1035   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1036   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1037 
1038   /* determine if any processor has disassembled, if so we must
1039      also disassemble ourselfs, in order that we may reassemble. */
1040   /*
1041      if nonzero structure of submatrix B cannot change then we know that
1042      no processor disassembled thus we can skip this stuff
1043   */
1044   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1045     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1046     if (mat->was_assembled && !other_disassembled) {
1047       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1048     }
1049   }
1050 
1051   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1052     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1053   }
1054   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
1055   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1056   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1057 
1058 #if defined(PETSC_USE_INFO)
1059   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1060     ierr = PetscInfo1(0,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
1061     baij->ht_total_ct  = 0;
1062     baij->ht_insert_ct = 0;
1063   }
1064 #endif
1065   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1066     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1067     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1068     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1069   }
1070 
1071   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1072   baij->rowvalues = 0;
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 #undef __FUNCT__
1077 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1078 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1079 {
1080   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1081   PetscErrorCode    ierr;
1082   PetscMPIInt       size = baij->size,rank = baij->rank;
1083   PetscInt          bs = mat->rmap.bs;
1084   PetscTruth        iascii,isdraw;
1085   PetscViewer       sviewer;
1086   PetscViewerFormat format;
1087 
1088   PetscFunctionBegin;
1089   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1090   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1091   if (iascii) {
1092     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1093     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1094       MatInfo info;
1095       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1096       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1097       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
1098               rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
1099               mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr);
1100       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1101       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1102       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1103       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1104       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1105       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1106       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1107       PetscFunctionReturn(0);
1108     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1109       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1110       PetscFunctionReturn(0);
1111     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1112       PetscFunctionReturn(0);
1113     }
1114   }
1115 
1116   if (isdraw) {
1117     PetscDraw       draw;
1118     PetscTruth isnull;
1119     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1120     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1121   }
1122 
1123   if (size == 1) {
1124     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1125     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1126   } else {
1127     /* assemble the entire matrix onto first processor. */
1128     Mat         A;
1129     Mat_SeqBAIJ *Aloc;
1130     PetscInt    M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1131     MatScalar   *a;
1132 
1133     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1134     /* Perhaps this should be the type of mat? */
1135     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
1136     if (!rank) {
1137       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1138     } else {
1139       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1140     }
1141     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1142     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1143     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1144 
1145     /* copy over the A part */
1146     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1147     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1148     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1149 
1150     for (i=0; i<mbs; i++) {
1151       rvals[0] = bs*(baij->rstartbs + i);
1152       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1153       for (j=ai[i]; j<ai[i+1]; j++) {
1154         col = (baij->cstartbs+aj[j])*bs;
1155         for (k=0; k<bs; k++) {
1156           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1157           col++; a += bs;
1158         }
1159       }
1160     }
1161     /* copy over the B part */
1162     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1163     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1164     for (i=0; i<mbs; i++) {
1165       rvals[0] = bs*(baij->rstartbs + i);
1166       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1167       for (j=ai[i]; j<ai[i+1]; j++) {
1168         col = baij->garray[aj[j]]*bs;
1169         for (k=0; k<bs; k++) {
1170           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1171           col++; a += bs;
1172         }
1173       }
1174     }
1175     ierr = PetscFree(rvals);CHKERRQ(ierr);
1176     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1177     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1178     /*
1179        Everyone has to call to draw the matrix since the graphics waits are
1180        synchronized across all processors that share the PetscDraw object
1181     */
1182     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1183     if (!rank) {
1184       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1185       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1186     }
1187     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1188     ierr = MatDestroy(A);CHKERRQ(ierr);
1189   }
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 #undef __FUNCT__
1194 #define __FUNCT__ "MatView_MPIBAIJ"
1195 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1196 {
1197   PetscErrorCode ierr;
1198   PetscTruth     iascii,isdraw,issocket,isbinary;
1199 
1200   PetscFunctionBegin;
1201   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1202   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1203   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1204   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1205   if (iascii || isdraw || issocket || isbinary) {
1206     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1207   } else {
1208     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1209   }
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 #undef __FUNCT__
1214 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1215 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1216 {
1217   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1218   PetscErrorCode ierr;
1219 
1220   PetscFunctionBegin;
1221 #if defined(PETSC_USE_LOG)
1222   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N);
1223 #endif
1224   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1225   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1226   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1227   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1228 #if defined (PETSC_USE_CTABLE)
1229   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1230 #else
1231   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1232 #endif
1233   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1234   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1235   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1236   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1237   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1238   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1239 #if defined(PETSC_USE_MAT_SINGLE)
1240   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1241 #endif
1242   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1243   ierr = PetscFree(baij);CHKERRQ(ierr);
1244 
1245   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1246   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1247   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1248   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1249   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1250   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1251   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1252   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 #undef __FUNCT__
1257 #define __FUNCT__ "MatMult_MPIBAIJ"
1258 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1259 {
1260   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1261   PetscErrorCode ierr;
1262   PetscInt       nt;
1263 
1264   PetscFunctionBegin;
1265   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1266   if (nt != A->cmap.n) {
1267     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1268   }
1269   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1270   if (nt != A->rmap.n) {
1271     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1272   }
1273   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1274   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1275   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1276   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 #undef __FUNCT__
1281 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1282 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1283 {
1284   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1285   PetscErrorCode ierr;
1286 
1287   PetscFunctionBegin;
1288   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1289   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1290   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1291   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1292   PetscFunctionReturn(0);
1293 }
1294 
1295 #undef __FUNCT__
1296 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1297 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1298 {
1299   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1300   PetscErrorCode ierr;
1301   PetscTruth     merged;
1302 
1303   PetscFunctionBegin;
1304   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1305   /* do nondiagonal part */
1306   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1307   if (!merged) {
1308     /* send it on its way */
1309     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1310     /* do local part */
1311     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1312     /* receive remote parts: note this assumes the values are not actually */
1313     /* inserted in yy until the next line */
1314     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1315   } else {
1316     /* do local part */
1317     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1318     /* send it on its way */
1319     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1320     /* values actually were received in the Begin() but we need to call this nop */
1321     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1322   }
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #undef __FUNCT__
1327 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1328 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1329 {
1330   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1331   PetscErrorCode ierr;
1332 
1333   PetscFunctionBegin;
1334   /* do nondiagonal part */
1335   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1336   /* send it on its way */
1337   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1338   /* do local part */
1339   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1340   /* receive remote parts: note this assumes the values are not actually */
1341   /* inserted in yy until the next line, which is true for my implementation*/
1342   /* but is not perhaps always true. */
1343   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 /*
1348   This only works correctly for square matrices where the subblock A->A is the
1349    diagonal block
1350 */
1351 #undef __FUNCT__
1352 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1353 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1354 {
1355   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1356   PetscErrorCode ierr;
1357 
1358   PetscFunctionBegin;
1359   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1360   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 #undef __FUNCT__
1365 #define __FUNCT__ "MatScale_MPIBAIJ"
1366 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1367 {
1368   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1369   PetscErrorCode ierr;
1370 
1371   PetscFunctionBegin;
1372   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1373   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 #undef __FUNCT__
1378 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1379 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1380 {
1381   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1382   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1383   PetscErrorCode ierr;
1384   PetscInt       bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1385   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend;
1386   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1387 
1388   PetscFunctionBegin;
1389   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1390   mat->getrowactive = PETSC_TRUE;
1391 
1392   if (!mat->rowvalues && (idx || v)) {
1393     /*
1394         allocate enough space to hold information from the longest row.
1395     */
1396     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1397     PetscInt     max = 1,mbs = mat->mbs,tmp;
1398     for (i=0; i<mbs; i++) {
1399       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1400       if (max < tmp) { max = tmp; }
1401     }
1402     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1403     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1404   }
1405 
1406   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1407   lrow = row - brstart;
1408 
1409   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1410   if (!v)   {pvA = 0; pvB = 0;}
1411   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1412   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1413   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1414   nztot = nzA + nzB;
1415 
1416   cmap  = mat->garray;
1417   if (v  || idx) {
1418     if (nztot) {
1419       /* Sort by increasing column numbers, assuming A and B already sorted */
1420       PetscInt imark = -1;
1421       if (v) {
1422         *v = v_p = mat->rowvalues;
1423         for (i=0; i<nzB; i++) {
1424           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1425           else break;
1426         }
1427         imark = i;
1428         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1429         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1430       }
1431       if (idx) {
1432         *idx = idx_p = mat->rowindices;
1433         if (imark > -1) {
1434           for (i=0; i<imark; i++) {
1435             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1436           }
1437         } else {
1438           for (i=0; i<nzB; i++) {
1439             if (cmap[cworkB[i]/bs] < cstart)
1440               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1441             else break;
1442           }
1443           imark = i;
1444         }
1445         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1446         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1447       }
1448     } else {
1449       if (idx) *idx = 0;
1450       if (v)   *v   = 0;
1451     }
1452   }
1453   *nz = nztot;
1454   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1455   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 #undef __FUNCT__
1460 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1461 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1462 {
1463   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1464 
1465   PetscFunctionBegin;
1466   if (!baij->getrowactive) {
1467     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1468   }
1469   baij->getrowactive = PETSC_FALSE;
1470   PetscFunctionReturn(0);
1471 }
1472 
1473 #undef __FUNCT__
1474 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1475 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1476 {
1477   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1478   PetscErrorCode ierr;
1479 
1480   PetscFunctionBegin;
1481   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1482   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1483   PetscFunctionReturn(0);
1484 }
1485 
1486 #undef __FUNCT__
1487 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1488 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1489 {
1490   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1491   Mat            A = a->A,B = a->B;
1492   PetscErrorCode ierr;
1493   PetscReal      isend[5],irecv[5];
1494 
1495   PetscFunctionBegin;
1496   info->block_size     = (PetscReal)matin->rmap.bs;
1497   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1498   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1499   isend[3] = info->memory;  isend[4] = info->mallocs;
1500   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1501   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1502   isend[3] += info->memory;  isend[4] += info->mallocs;
1503   if (flag == MAT_LOCAL) {
1504     info->nz_used      = isend[0];
1505     info->nz_allocated = isend[1];
1506     info->nz_unneeded  = isend[2];
1507     info->memory       = isend[3];
1508     info->mallocs      = isend[4];
1509   } else if (flag == MAT_GLOBAL_MAX) {
1510     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1511     info->nz_used      = irecv[0];
1512     info->nz_allocated = irecv[1];
1513     info->nz_unneeded  = irecv[2];
1514     info->memory       = irecv[3];
1515     info->mallocs      = irecv[4];
1516   } else if (flag == MAT_GLOBAL_SUM) {
1517     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1518     info->nz_used      = irecv[0];
1519     info->nz_allocated = irecv[1];
1520     info->nz_unneeded  = irecv[2];
1521     info->memory       = irecv[3];
1522     info->mallocs      = irecv[4];
1523   } else {
1524     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1525   }
1526   info->rows_global       = (PetscReal)A->rmap.N;
1527   info->columns_global    = (PetscReal)A->cmap.N;
1528   info->rows_local        = (PetscReal)A->rmap.N;
1529   info->columns_local     = (PetscReal)A->cmap.N;
1530   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1531   info->fill_ratio_needed = 0;
1532   info->factor_mallocs    = 0;
1533   PetscFunctionReturn(0);
1534 }
1535 
1536 #undef __FUNCT__
1537 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1538 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg)
1539 {
1540   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1541   PetscErrorCode ierr;
1542 
1543   PetscFunctionBegin;
1544   switch (op) {
1545   case MAT_NEW_NONZERO_LOCATIONS:
1546   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1547   case MAT_KEEP_ZEROED_ROWS:
1548   case MAT_NEW_NONZERO_LOCATION_ERR:
1549     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1550     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1551     break;
1552   case MAT_ROW_ORIENTED:
1553     a->roworiented = flg;
1554     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1555     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1556     break;
1557   case MAT_NEW_DIAGONALS:
1558     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1559     break;
1560   case MAT_IGNORE_OFF_PROC_ENTRIES:
1561     a->donotstash = flg;
1562     break;
1563   case MAT_USE_HASH_TABLE:
1564     a->ht_flag = flg;
1565     break;
1566   case MAT_SYMMETRIC:
1567   case MAT_STRUCTURALLY_SYMMETRIC:
1568   case MAT_HERMITIAN:
1569   case MAT_SYMMETRY_ETERNAL:
1570     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1571     break;
1572   default:
1573     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1574   }
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 #undef __FUNCT__
1579 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1580 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1581 {
1582   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1583   Mat_SeqBAIJ    *Aloc;
1584   Mat            B;
1585   PetscErrorCode ierr;
1586   PetscInt       M=A->rmap.N,N=A->cmap.N,*ai,*aj,i,*rvals,j,k,col;
1587   PetscInt       bs=A->rmap.bs,mbs=baij->mbs;
1588   MatScalar      *a;
1589 
1590   PetscFunctionBegin;
1591   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1592   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1593   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1594   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1595   ierr = MatMPIBAIJSetPreallocation(B,A->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1596 
1597   /* copy over the A part */
1598   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1599   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1600   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1601 
1602   for (i=0; i<mbs; i++) {
1603     rvals[0] = bs*(baij->rstartbs + i);
1604     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1605     for (j=ai[i]; j<ai[i+1]; j++) {
1606       col = (baij->cstartbs+aj[j])*bs;
1607       for (k=0; k<bs; k++) {
1608         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1609         col++; a += bs;
1610       }
1611     }
1612   }
1613   /* copy over the B part */
1614   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1615   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1616   for (i=0; i<mbs; i++) {
1617     rvals[0] = bs*(baij->rstartbs + i);
1618     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1619     for (j=ai[i]; j<ai[i+1]; j++) {
1620       col = baij->garray[aj[j]]*bs;
1621       for (k=0; k<bs; k++) {
1622         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1623         col++; a += bs;
1624       }
1625     }
1626   }
1627   ierr = PetscFree(rvals);CHKERRQ(ierr);
1628   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1629   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1630 
1631   if (matout) {
1632     *matout = B;
1633   } else {
1634     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1635   }
1636   PetscFunctionReturn(0);
1637 }
1638 
1639 #undef __FUNCT__
1640 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1641 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1642 {
1643   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1644   Mat            a = baij->A,b = baij->B;
1645   PetscErrorCode ierr;
1646   PetscInt       s1,s2,s3;
1647 
1648   PetscFunctionBegin;
1649   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1650   if (rr) {
1651     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1652     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1653     /* Overlap communication with computation. */
1654     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1655   }
1656   if (ll) {
1657     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1658     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1659     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1660   }
1661   /* scale  the diagonal block */
1662   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1663 
1664   if (rr) {
1665     /* Do a scatter end and then right scale the off-diagonal block */
1666     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1667     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1668   }
1669 
1670   PetscFunctionReturn(0);
1671 }
1672 
1673 #undef __FUNCT__
1674 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1675 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1676 {
1677   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1678   PetscErrorCode ierr;
1679   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1680   PetscInt       i,*owners = A->rmap.range;
1681   PetscInt       *nprocs,j,idx,nsends,row;
1682   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1683   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1;
1684   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap.rstart;
1685   MPI_Comm       comm = A->comm;
1686   MPI_Request    *send_waits,*recv_waits;
1687   MPI_Status     recv_status,*send_status;
1688 #if defined(PETSC_DEBUG)
1689   PetscTruth     found = PETSC_FALSE;
1690 #endif
1691 
1692   PetscFunctionBegin;
1693   /*  first count number of contributors to each processor */
1694   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1695   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1696   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1697   j = 0;
1698   for (i=0; i<N; i++) {
1699     if (lastidx > (idx = rows[i])) j = 0;
1700     lastidx = idx;
1701     for (; j<size; j++) {
1702       if (idx >= owners[j] && idx < owners[j+1]) {
1703         nprocs[2*j]++;
1704         nprocs[2*j+1] = 1;
1705         owner[i] = j;
1706 #if defined(PETSC_DEBUG)
1707         found = PETSC_TRUE;
1708 #endif
1709         break;
1710       }
1711     }
1712 #if defined(PETSC_DEBUG)
1713     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1714     found = PETSC_FALSE;
1715 #endif
1716   }
1717   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1718 
1719   /* inform other processors of number of messages and max length*/
1720   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1721 
1722   /* post receives:   */
1723   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1724   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1725   for (i=0; i<nrecvs; i++) {
1726     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1727   }
1728 
1729   /* do sends:
1730      1) starts[i] gives the starting index in svalues for stuff going to
1731      the ith processor
1732   */
1733   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1734   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1735   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1736   starts[0]  = 0;
1737   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1738   for (i=0; i<N; i++) {
1739     svalues[starts[owner[i]]++] = rows[i];
1740   }
1741 
1742   starts[0] = 0;
1743   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1744   count = 0;
1745   for (i=0; i<size; i++) {
1746     if (nprocs[2*i+1]) {
1747       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1748     }
1749   }
1750   ierr = PetscFree(starts);CHKERRQ(ierr);
1751 
1752   base = owners[rank];
1753 
1754   /*  wait on receives */
1755   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
1756   source = lens + nrecvs;
1757   count  = nrecvs; slen = 0;
1758   while (count) {
1759     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1760     /* unpack receives into our local space */
1761     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1762     source[imdex]  = recv_status.MPI_SOURCE;
1763     lens[imdex]    = n;
1764     slen          += n;
1765     count--;
1766   }
1767   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1768 
1769   /* move the data into the send scatter */
1770   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1771   count = 0;
1772   for (i=0; i<nrecvs; i++) {
1773     values = rvalues + i*nmax;
1774     for (j=0; j<lens[i]; j++) {
1775       lrows[count++] = values[j] - base;
1776     }
1777   }
1778   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1779   ierr = PetscFree(lens);CHKERRQ(ierr);
1780   ierr = PetscFree(owner);CHKERRQ(ierr);
1781   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1782 
1783   /* actually zap the local rows */
1784   /*
1785         Zero the required rows. If the "diagonal block" of the matrix
1786      is square and the user wishes to set the diagonal we use separate
1787      code so that MatSetValues() is not called for each diagonal allocating
1788      new memory, thus calling lots of mallocs and slowing things down.
1789 
1790        Contributed by: Matthew Knepley
1791   */
1792   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1793   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1794   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
1795     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1796   } else if (diag != 0.0) {
1797     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1798     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1799       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1800 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1801     }
1802     for (i=0; i<slen; i++) {
1803       row  = lrows[i] + rstart_bs;
1804       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1805     }
1806     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1807     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1808   } else {
1809     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1810   }
1811 
1812   ierr = PetscFree(lrows);CHKERRQ(ierr);
1813 
1814   /* wait on sends */
1815   if (nsends) {
1816     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1817     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1818     ierr = PetscFree(send_status);CHKERRQ(ierr);
1819   }
1820   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1821   ierr = PetscFree(svalues);CHKERRQ(ierr);
1822 
1823   PetscFunctionReturn(0);
1824 }
1825 
1826 #undef __FUNCT__
1827 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1828 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1829 {
1830   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1831   PetscErrorCode ierr;
1832 
1833   PetscFunctionBegin;
1834   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1835   PetscFunctionReturn(0);
1836 }
1837 
1838 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1839 
1840 #undef __FUNCT__
1841 #define __FUNCT__ "MatEqual_MPIBAIJ"
1842 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1843 {
1844   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1845   Mat            a,b,c,d;
1846   PetscTruth     flg;
1847   PetscErrorCode ierr;
1848 
1849   PetscFunctionBegin;
1850   a = matA->A; b = matA->B;
1851   c = matB->A; d = matB->B;
1852 
1853   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1854   if (flg) {
1855     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1856   }
1857   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1858   PetscFunctionReturn(0);
1859 }
1860 
1861 #undef __FUNCT__
1862 #define __FUNCT__ "MatCopy_MPIBAIJ"
1863 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1864 {
1865   PetscErrorCode ierr;
1866   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1867   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1868 
1869   PetscFunctionBegin;
1870   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1871   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1872     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1873   } else {
1874     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1875     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1876   }
1877   PetscFunctionReturn(0);
1878 }
1879 
1880 #undef __FUNCT__
1881 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1882 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1883 {
1884   PetscErrorCode ierr;
1885 
1886   PetscFunctionBegin;
1887   ierr =  MatMPIBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1888   PetscFunctionReturn(0);
1889 }
1890 
1891 #include "petscblaslapack.h"
1892 #undef __FUNCT__
1893 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1894 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1895 {
1896   PetscErrorCode ierr;
1897   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1898   PetscBLASInt   bnz,one=1;
1899   Mat_SeqBAIJ    *x,*y;
1900 
1901   PetscFunctionBegin;
1902   if (str == SAME_NONZERO_PATTERN) {
1903     PetscScalar alpha = a;
1904     x = (Mat_SeqBAIJ *)xx->A->data;
1905     y = (Mat_SeqBAIJ *)yy->A->data;
1906     bnz = (PetscBLASInt)x->nz;
1907     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1908     x = (Mat_SeqBAIJ *)xx->B->data;
1909     y = (Mat_SeqBAIJ *)yy->B->data;
1910     bnz = (PetscBLASInt)x->nz;
1911     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1912   } else {
1913     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1914   }
1915   PetscFunctionReturn(0);
1916 }
1917 
1918 #undef __FUNCT__
1919 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1920 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1921 {
1922   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1923   PetscErrorCode ierr;
1924 
1925   PetscFunctionBegin;
1926   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1927   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 #undef __FUNCT__
1932 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1933 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1934 {
1935   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1936   PetscErrorCode ierr;
1937 
1938   PetscFunctionBegin;
1939   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1940   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1941   PetscFunctionReturn(0);
1942 }
1943 
1944 /* -------------------------------------------------------------------*/
1945 static struct _MatOps MatOps_Values = {
1946        MatSetValues_MPIBAIJ,
1947        MatGetRow_MPIBAIJ,
1948        MatRestoreRow_MPIBAIJ,
1949        MatMult_MPIBAIJ,
1950 /* 4*/ MatMultAdd_MPIBAIJ,
1951        MatMultTranspose_MPIBAIJ,
1952        MatMultTransposeAdd_MPIBAIJ,
1953        0,
1954        0,
1955        0,
1956 /*10*/ 0,
1957        0,
1958        0,
1959        0,
1960        MatTranspose_MPIBAIJ,
1961 /*15*/ MatGetInfo_MPIBAIJ,
1962        MatEqual_MPIBAIJ,
1963        MatGetDiagonal_MPIBAIJ,
1964        MatDiagonalScale_MPIBAIJ,
1965        MatNorm_MPIBAIJ,
1966 /*20*/ MatAssemblyBegin_MPIBAIJ,
1967        MatAssemblyEnd_MPIBAIJ,
1968        0,
1969        MatSetOption_MPIBAIJ,
1970        MatZeroEntries_MPIBAIJ,
1971 /*25*/ MatZeroRows_MPIBAIJ,
1972        0,
1973        0,
1974        0,
1975        0,
1976 /*30*/ MatSetUpPreallocation_MPIBAIJ,
1977        0,
1978        0,
1979        0,
1980        0,
1981 /*35*/ MatDuplicate_MPIBAIJ,
1982        0,
1983        0,
1984        0,
1985        0,
1986 /*40*/ MatAXPY_MPIBAIJ,
1987        MatGetSubMatrices_MPIBAIJ,
1988        MatIncreaseOverlap_MPIBAIJ,
1989        MatGetValues_MPIBAIJ,
1990        MatCopy_MPIBAIJ,
1991 /*45*/ 0,
1992        MatScale_MPIBAIJ,
1993        0,
1994        0,
1995        0,
1996 /*50*/ 0,
1997        0,
1998        0,
1999        0,
2000        0,
2001 /*55*/ 0,
2002        0,
2003        MatSetUnfactored_MPIBAIJ,
2004        0,
2005        MatSetValuesBlocked_MPIBAIJ,
2006 /*60*/ 0,
2007        MatDestroy_MPIBAIJ,
2008        MatView_MPIBAIJ,
2009        0,
2010        0,
2011 /*65*/ 0,
2012        0,
2013        0,
2014        0,
2015        0,
2016 /*70*/ MatGetRowMaxAbs_MPIBAIJ,
2017        0,
2018        0,
2019        0,
2020        0,
2021 /*75*/ 0,
2022        0,
2023        0,
2024        0,
2025        0,
2026 /*80*/ 0,
2027        0,
2028        0,
2029        0,
2030        MatLoad_MPIBAIJ,
2031 /*85*/ 0,
2032        0,
2033        0,
2034        0,
2035        0,
2036 /*90*/ 0,
2037        0,
2038        0,
2039        0,
2040        0,
2041 /*95*/ 0,
2042        0,
2043        0,
2044        0,
2045        0,
2046 /*100*/0,
2047        0,
2048        0,
2049        0,
2050        0,
2051 /*105*/0,
2052        MatRealPart_MPIBAIJ,
2053        MatImaginaryPart_MPIBAIJ};
2054 
2055 
2056 EXTERN_C_BEGIN
2057 #undef __FUNCT__
2058 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2059 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2060 {
2061   PetscFunctionBegin;
2062   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2063   *iscopy = PETSC_FALSE;
2064   PetscFunctionReturn(0);
2065 }
2066 EXTERN_C_END
2067 
2068 EXTERN_C_BEGIN
2069 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2070 EXTERN_C_END
2071 
2072 #undef __FUNCT__
2073 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2074 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2075 {
2076   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)B->data;
2077   PetscInt       m = B->rmap.n/bs,cstart = baij->cstartbs, cend = baij->cendbs,j,nnz,i,d;
2078   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = baij->rstartbs,ii;
2079   const PetscInt *JJ;
2080   PetscScalar    *values;
2081   PetscErrorCode ierr;
2082 
2083   PetscFunctionBegin;
2084 #if defined(PETSC_OPT_g)
2085   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2086 #endif
2087   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2088   o_nnz = d_nnz + m;
2089 
2090   for (i=0; i<m; i++) {
2091     nnz     = Ii[i+1]- Ii[i];
2092     JJ      = J + Ii[i];
2093     nnz_max = PetscMax(nnz_max,nnz);
2094 #if defined(PETSC_OPT_g)
2095     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2096 #endif
2097     for (j=0; j<nnz; j++) {
2098       if (*JJ >= cstart) break;
2099       JJ++;
2100     }
2101     d = 0;
2102     for (; j<nnz; j++) {
2103       if (*JJ++ >= cend) break;
2104       d++;
2105     }
2106     d_nnz[i] = d;
2107     o_nnz[i] = nnz - d;
2108   }
2109   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2110   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2111 
2112   if (v) values = (PetscScalar*)v;
2113   else {
2114     ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2115     ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2116   }
2117 
2118   for (i=0; i<m; i++) {
2119     ii   = i + rstart;
2120     nnz  = Ii[i+1]- Ii[i];
2121     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2122   }
2123   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2124   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2125 
2126   if (!v) {
2127     ierr = PetscFree(values);CHKERRQ(ierr);
2128   }
2129   PetscFunctionReturn(0);
2130 }
2131 
2132 #undef __FUNCT__
2133 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2134 /*@C
2135    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2136    (the default parallel PETSc format).
2137 
2138    Collective on MPI_Comm
2139 
2140    Input Parameters:
2141 +  A - the matrix
2142 .  i - the indices into j for the start of each local row (starts with zero)
2143 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2144 -  v - optional values in the matrix
2145 
2146    Level: developer
2147 
2148 .keywords: matrix, aij, compressed row, sparse, parallel
2149 
2150 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2151 @*/
2152 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2153 {
2154   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2155 
2156   PetscFunctionBegin;
2157   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2158   if (f) {
2159     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2160   }
2161   PetscFunctionReturn(0);
2162 }
2163 
2164 EXTERN_C_BEGIN
2165 #undef __FUNCT__
2166 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2167 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2168 {
2169   Mat_MPIBAIJ    *b;
2170   PetscErrorCode ierr;
2171   PetscInt       i;
2172 
2173   PetscFunctionBegin;
2174   B->preallocated = PETSC_TRUE;
2175   ierr = PetscOptionsBegin(B->comm,B->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2176     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2177   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2178 
2179   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2180   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2181   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2182   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2183   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2184 
2185   B->rmap.bs  = bs;
2186   B->cmap.bs  = bs;
2187   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2188   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2189 
2190   if (d_nnz) {
2191     for (i=0; i<B->rmap.n/bs; i++) {
2192       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2193     }
2194   }
2195   if (o_nnz) {
2196     for (i=0; i<B->rmap.n/bs; i++) {
2197       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2198     }
2199   }
2200 
2201   b = (Mat_MPIBAIJ*)B->data;
2202   b->bs2 = bs*bs;
2203   b->mbs = B->rmap.n/bs;
2204   b->nbs = B->cmap.n/bs;
2205   b->Mbs = B->rmap.N/bs;
2206   b->Nbs = B->cmap.N/bs;
2207 
2208   for (i=0; i<=b->size; i++) {
2209     b->rangebs[i] = B->rmap.range[i]/bs;
2210   }
2211   b->rstartbs = B->rmap.rstart/bs;
2212   b->rendbs   = B->rmap.rend/bs;
2213   b->cstartbs = B->cmap.rstart/bs;
2214   b->cendbs   = B->cmap.rend/bs;
2215 
2216   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2217   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2218   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2219   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2220   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2221   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2222   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2223   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2224   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2225   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2226 
2227   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2228 
2229   PetscFunctionReturn(0);
2230 }
2231 EXTERN_C_END
2232 
2233 EXTERN_C_BEGIN
2234 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2235 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2236 EXTERN_C_END
2237 
2238 /*MC
2239    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2240 
2241    Options Database Keys:
2242 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2243 . -mat_block_size <bs> - set the blocksize used to store the matrix
2244 - -mat_use_hash_table <fact>
2245 
2246   Level: beginner
2247 
2248 .seealso: MatCreateMPIBAIJ
2249 M*/
2250 
2251 EXTERN_C_BEGIN
2252 #undef __FUNCT__
2253 #define __FUNCT__ "MatCreate_MPIBAIJ"
2254 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2255 {
2256   Mat_MPIBAIJ    *b;
2257   PetscErrorCode ierr;
2258   PetscTruth     flg;
2259 
2260   PetscFunctionBegin;
2261   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2262   B->data = (void*)b;
2263 
2264 
2265   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2266   B->mapping    = 0;
2267   B->factor     = 0;
2268   B->assembled  = PETSC_FALSE;
2269 
2270   B->insertmode = NOT_SET_VALUES;
2271   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2272   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2273 
2274   /* build local table of row and column ownerships */
2275   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
2276 
2277   /* build cache for off array entries formed */
2278   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2279   b->donotstash  = PETSC_FALSE;
2280   b->colmap      = PETSC_NULL;
2281   b->garray      = PETSC_NULL;
2282   b->roworiented = PETSC_TRUE;
2283 
2284 #if defined(PETSC_USE_MAT_SINGLE)
2285   /* stuff for MatSetValues_XXX in single precision */
2286   b->setvalueslen     = 0;
2287   b->setvaluescopy    = PETSC_NULL;
2288 #endif
2289 
2290   /* stuff used in block assembly */
2291   b->barray       = 0;
2292 
2293   /* stuff used for matrix vector multiply */
2294   b->lvec         = 0;
2295   b->Mvctx        = 0;
2296 
2297   /* stuff for MatGetRow() */
2298   b->rowindices   = 0;
2299   b->rowvalues    = 0;
2300   b->getrowactive = PETSC_FALSE;
2301 
2302   /* hash table stuff */
2303   b->ht           = 0;
2304   b->hd           = 0;
2305   b->ht_size      = 0;
2306   b->ht_flag      = PETSC_FALSE;
2307   b->ht_fact      = 0;
2308   b->ht_total_ct  = 0;
2309   b->ht_insert_ct = 0;
2310 
2311   ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2312     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2313     if (flg) {
2314       PetscReal fact = 1.39;
2315       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2316       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
2317       if (fact <= 1.0) fact = 1.39;
2318       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2319       ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2320     }
2321   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2322 
2323   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2324                                      "MatStoreValues_MPIBAIJ",
2325                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2326   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2327                                      "MatRetrieveValues_MPIBAIJ",
2328                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2329   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2330                                      "MatGetDiagonalBlock_MPIBAIJ",
2331                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2332   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2333                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2334                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2335   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2336 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
2337 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2338   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2339                                      "MatDiagonalScaleLocal_MPIBAIJ",
2340                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2341   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2342                                      "MatSetHashTableFactor_MPIBAIJ",
2343                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2344   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
2345   PetscFunctionReturn(0);
2346 }
2347 EXTERN_C_END
2348 
2349 /*MC
2350    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2351 
2352    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2353    and MATMPIBAIJ otherwise.
2354 
2355    Options Database Keys:
2356 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2357 
2358   Level: beginner
2359 
2360 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2361 M*/
2362 
2363 EXTERN_C_BEGIN
2364 #undef __FUNCT__
2365 #define __FUNCT__ "MatCreate_BAIJ"
2366 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
2367 {
2368   PetscErrorCode ierr;
2369   PetscMPIInt    size;
2370 
2371   PetscFunctionBegin;
2372   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2373   if (size == 1) {
2374     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2375   } else {
2376     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2377   }
2378   PetscFunctionReturn(0);
2379 }
2380 EXTERN_C_END
2381 
2382 #undef __FUNCT__
2383 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2384 /*@C
2385    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2386    (block compressed row).  For good matrix assembly performance
2387    the user should preallocate the matrix storage by setting the parameters
2388    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2389    performance can be increased by more than a factor of 50.
2390 
2391    Collective on Mat
2392 
2393    Input Parameters:
2394 +  A - the matrix
2395 .  bs   - size of blockk
2396 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2397            submatrix  (same for all local rows)
2398 .  d_nnz - array containing the number of block nonzeros in the various block rows
2399            of the in diagonal portion of the local (possibly different for each block
2400            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2401 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2402            submatrix (same for all local rows).
2403 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2404            off-diagonal portion of the local submatrix (possibly different for
2405            each block row) or PETSC_NULL.
2406 
2407    If the *_nnz parameter is given then the *_nz parameter is ignored
2408 
2409    Options Database Keys:
2410 +   -mat_block_size - size of the blocks to use
2411 -   -mat_use_hash_table <fact>
2412 
2413    Notes:
2414    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2415    than it must be used on all processors that share the object for that argument.
2416 
2417    Storage Information:
2418    For a square global matrix we define each processor's diagonal portion
2419    to be its local rows and the corresponding columns (a square submatrix);
2420    each processor's off-diagonal portion encompasses the remainder of the
2421    local matrix (a rectangular submatrix).
2422 
2423    The user can specify preallocated storage for the diagonal part of
2424    the local submatrix with either d_nz or d_nnz (not both).  Set
2425    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2426    memory allocation.  Likewise, specify preallocated storage for the
2427    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2428 
2429    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2430    the figure below we depict these three local rows and all columns (0-11).
2431 
2432 .vb
2433            0 1 2 3 4 5 6 7 8 9 10 11
2434           -------------------
2435    row 3  |  o o o d d d o o o o o o
2436    row 4  |  o o o d d d o o o o o o
2437    row 5  |  o o o d d d o o o o o o
2438           -------------------
2439 .ve
2440 
2441    Thus, any entries in the d locations are stored in the d (diagonal)
2442    submatrix, and any entries in the o locations are stored in the
2443    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2444    stored simply in the MATSEQBAIJ format for compressed row storage.
2445 
2446    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2447    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2448    In general, for PDE problems in which most nonzeros are near the diagonal,
2449    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2450    or you will get TERRIBLE performance; see the users' manual chapter on
2451    matrices.
2452 
2453    Level: intermediate
2454 
2455 .keywords: matrix, block, aij, compressed row, sparse, parallel
2456 
2457 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2458 @*/
2459 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2460 {
2461   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2462 
2463   PetscFunctionBegin;
2464   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2465   if (f) {
2466     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2467   }
2468   PetscFunctionReturn(0);
2469 }
2470 
2471 #undef __FUNCT__
2472 #define __FUNCT__ "MatCreateMPIBAIJ"
2473 /*@C
2474    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2475    (block compressed row).  For good matrix assembly performance
2476    the user should preallocate the matrix storage by setting the parameters
2477    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2478    performance can be increased by more than a factor of 50.
2479 
2480    Collective on MPI_Comm
2481 
2482    Input Parameters:
2483 +  comm - MPI communicator
2484 .  bs   - size of blockk
2485 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2486            This value should be the same as the local size used in creating the
2487            y vector for the matrix-vector product y = Ax.
2488 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2489            This value should be the same as the local size used in creating the
2490            x vector for the matrix-vector product y = Ax.
2491 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2492 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2493 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2494            submatrix  (same for all local rows)
2495 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2496            of the in diagonal portion of the local (possibly different for each block
2497            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2498 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2499            submatrix (same for all local rows).
2500 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2501            off-diagonal portion of the local submatrix (possibly different for
2502            each block row) or PETSC_NULL.
2503 
2504    Output Parameter:
2505 .  A - the matrix
2506 
2507    Options Database Keys:
2508 +   -mat_block_size - size of the blocks to use
2509 -   -mat_use_hash_table <fact>
2510 
2511    Notes:
2512    If the *_nnz parameter is given then the *_nz parameter is ignored
2513 
2514    A nonzero block is any block that as 1 or more nonzeros in it
2515 
2516    The user MUST specify either the local or global matrix dimensions
2517    (possibly both).
2518 
2519    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2520    than it must be used on all processors that share the object for that argument.
2521 
2522    Storage Information:
2523    For a square global matrix we define each processor's diagonal portion
2524    to be its local rows and the corresponding columns (a square submatrix);
2525    each processor's off-diagonal portion encompasses the remainder of the
2526    local matrix (a rectangular submatrix).
2527 
2528    The user can specify preallocated storage for the diagonal part of
2529    the local submatrix with either d_nz or d_nnz (not both).  Set
2530    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2531    memory allocation.  Likewise, specify preallocated storage for the
2532    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2533 
2534    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2535    the figure below we depict these three local rows and all columns (0-11).
2536 
2537 .vb
2538            0 1 2 3 4 5 6 7 8 9 10 11
2539           -------------------
2540    row 3  |  o o o d d d o o o o o o
2541    row 4  |  o o o d d d o o o o o o
2542    row 5  |  o o o d d d o o o o o o
2543           -------------------
2544 .ve
2545 
2546    Thus, any entries in the d locations are stored in the d (diagonal)
2547    submatrix, and any entries in the o locations are stored in the
2548    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2549    stored simply in the MATSEQBAIJ format for compressed row storage.
2550 
2551    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2552    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2553    In general, for PDE problems in which most nonzeros are near the diagonal,
2554    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2555    or you will get TERRIBLE performance; see the users' manual chapter on
2556    matrices.
2557 
2558    Level: intermediate
2559 
2560 .keywords: matrix, block, aij, compressed row, sparse, parallel
2561 
2562 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2563 @*/
2564 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2565 {
2566   PetscErrorCode ierr;
2567   PetscMPIInt    size;
2568 
2569   PetscFunctionBegin;
2570   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2571   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2572   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2573   if (size > 1) {
2574     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2575     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2576   } else {
2577     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2578     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2579   }
2580   PetscFunctionReturn(0);
2581 }
2582 
2583 #undef __FUNCT__
2584 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2585 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2586 {
2587   Mat            mat;
2588   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2589   PetscErrorCode ierr;
2590   PetscInt       len=0;
2591 
2592   PetscFunctionBegin;
2593   *newmat       = 0;
2594   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2595   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2596   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2597   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2598 
2599   mat->factor       = matin->factor;
2600   mat->preallocated = PETSC_TRUE;
2601   mat->assembled    = PETSC_TRUE;
2602   mat->insertmode   = NOT_SET_VALUES;
2603 
2604   a      = (Mat_MPIBAIJ*)mat->data;
2605   mat->rmap.bs  = matin->rmap.bs;
2606   a->bs2   = oldmat->bs2;
2607   a->mbs   = oldmat->mbs;
2608   a->nbs   = oldmat->nbs;
2609   a->Mbs   = oldmat->Mbs;
2610   a->Nbs   = oldmat->Nbs;
2611 
2612   ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2613   ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2614 
2615   a->size         = oldmat->size;
2616   a->rank         = oldmat->rank;
2617   a->donotstash   = oldmat->donotstash;
2618   a->roworiented  = oldmat->roworiented;
2619   a->rowindices   = 0;
2620   a->rowvalues    = 0;
2621   a->getrowactive = PETSC_FALSE;
2622   a->barray       = 0;
2623   a->rstartbs     = oldmat->rstartbs;
2624   a->rendbs       = oldmat->rendbs;
2625   a->cstartbs     = oldmat->cstartbs;
2626   a->cendbs       = oldmat->cendbs;
2627 
2628   /* hash table stuff */
2629   a->ht           = 0;
2630   a->hd           = 0;
2631   a->ht_size      = 0;
2632   a->ht_flag      = oldmat->ht_flag;
2633   a->ht_fact      = oldmat->ht_fact;
2634   a->ht_total_ct  = 0;
2635   a->ht_insert_ct = 0;
2636 
2637   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
2638   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2639   ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr);
2640   if (oldmat->colmap) {
2641 #if defined (PETSC_USE_CTABLE)
2642   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2643 #else
2644   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2645   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2646   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2647 #endif
2648   } else a->colmap = 0;
2649 
2650   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2651     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2652     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2653     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2654   } else a->garray = 0;
2655 
2656   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2657   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2658   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2659   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2660 
2661   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2662   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2663   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2664   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2665   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2666   *newmat = mat;
2667 
2668   PetscFunctionReturn(0);
2669 }
2670 
2671 #include "petscsys.h"
2672 
2673 #undef __FUNCT__
2674 #define __FUNCT__ "MatLoad_MPIBAIJ"
2675 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2676 {
2677   Mat            A;
2678   PetscErrorCode ierr;
2679   int            fd;
2680   PetscInt       i,nz,j,rstart,rend;
2681   PetscScalar    *vals,*buf;
2682   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2683   MPI_Status     status;
2684   PetscMPIInt    rank,size,maxnz;
2685   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2686   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
2687   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
2688   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
2689   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
2690   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
2691 
2692   PetscFunctionBegin;
2693   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
2694     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2695   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2696 
2697   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2698   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2699   if (!rank) {
2700     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2701     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2702     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2703   }
2704 
2705   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2706   M = header[1]; N = header[2];
2707 
2708   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2709 
2710   /*
2711      This code adds extra rows to make sure the number of rows is
2712      divisible by the blocksize
2713   */
2714   Mbs        = M/bs;
2715   extra_rows = bs - M + bs*Mbs;
2716   if (extra_rows == bs) extra_rows = 0;
2717   else                  Mbs++;
2718   if (extra_rows && !rank) {
2719     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2720   }
2721 
2722   /* determine ownership of all rows */
2723   mbs        = Mbs/size + ((Mbs % size) > rank);
2724   m          = mbs*bs;
2725   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
2726   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2727 
2728   /* process 0 needs enough room for process with most rows */
2729   if (!rank) {
2730     mmax = rowners[1];
2731     for (i=2; i<size; i++) {
2732       mmax = PetscMax(mmax,rowners[i]);
2733     }
2734     mmax*=bs;
2735   } else mmax = m;
2736 
2737   rowners[0] = 0;
2738   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
2739   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2740   rstart = rowners[rank];
2741   rend   = rowners[rank+1];
2742 
2743   /* distribute row lengths to all processors */
2744   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2745   if (!rank) {
2746     mend = m;
2747     if (size == 1) mend = mend - extra_rows;
2748     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
2749     for (j=mend; j<m; j++) locrowlens[j] = 1;
2750     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2751     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2752     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2753     for (j=0; j<m; j++) {
2754       procsnz[0] += locrowlens[j];
2755     }
2756     for (i=1; i<size; i++) {
2757       mend = browners[i+1] - browners[i];
2758       if (i == size-1) mend = mend - extra_rows;
2759       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
2760       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2761       /* calculate the number of nonzeros on each processor */
2762       for (j=0; j<browners[i+1]-browners[i]; j++) {
2763         procsnz[i] += rowlengths[j];
2764       }
2765       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2766     }
2767     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2768   } else {
2769     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2770   }
2771 
2772   if (!rank) {
2773     /* determine max buffer needed and allocate it */
2774     maxnz = procsnz[0];
2775     for (i=1; i<size; i++) {
2776       maxnz = PetscMax(maxnz,procsnz[i]);
2777     }
2778     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2779 
2780     /* read in my part of the matrix column indices  */
2781     nz     = procsnz[0];
2782     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2783     mycols = ibuf;
2784     if (size == 1)  nz -= extra_rows;
2785     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2786     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2787 
2788     /* read in every ones (except the last) and ship off */
2789     for (i=1; i<size-1; i++) {
2790       nz   = procsnz[i];
2791       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2792       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2793     }
2794     /* read in the stuff for the last proc */
2795     if (size != 1) {
2796       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2797       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2798       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2799       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2800     }
2801     ierr = PetscFree(cols);CHKERRQ(ierr);
2802   } else {
2803     /* determine buffer space needed for message */
2804     nz = 0;
2805     for (i=0; i<m; i++) {
2806       nz += locrowlens[i];
2807     }
2808     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2809     mycols = ibuf;
2810     /* receive message of column indices*/
2811     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2812     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2813     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2814   }
2815 
2816   /* loop over local rows, determining number of off diagonal entries */
2817   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2818   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2819   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2820   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2821   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2822   rowcount = 0; nzcount = 0;
2823   for (i=0; i<mbs; i++) {
2824     dcount  = 0;
2825     odcount = 0;
2826     for (j=0; j<bs; j++) {
2827       kmax = locrowlens[rowcount];
2828       for (k=0; k<kmax; k++) {
2829         tmp = mycols[nzcount++]/bs;
2830         if (!mask[tmp]) {
2831           mask[tmp] = 1;
2832           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2833           else masked1[dcount++] = tmp;
2834         }
2835       }
2836       rowcount++;
2837     }
2838 
2839     dlens[i]  = dcount;
2840     odlens[i] = odcount;
2841 
2842     /* zero out the mask elements we set */
2843     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2844     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2845   }
2846 
2847   /* create our matrix */
2848   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2849   ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2850   ierr = MatSetType(A,type);CHKERRQ(ierr)
2851   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2852 
2853   if (!rank) {
2854     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2855     /* read in my part of the matrix numerical values  */
2856     nz = procsnz[0];
2857     vals = buf;
2858     mycols = ibuf;
2859     if (size == 1)  nz -= extra_rows;
2860     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2861     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2862 
2863     /* insert into matrix */
2864     jj      = rstart*bs;
2865     for (i=0; i<m; i++) {
2866       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2867       mycols += locrowlens[i];
2868       vals   += locrowlens[i];
2869       jj++;
2870     }
2871     /* read in other processors (except the last one) and ship out */
2872     for (i=1; i<size-1; i++) {
2873       nz   = procsnz[i];
2874       vals = buf;
2875       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2876       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2877     }
2878     /* the last proc */
2879     if (size != 1){
2880       nz   = procsnz[i] - extra_rows;
2881       vals = buf;
2882       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2883       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2884       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2885     }
2886     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2887   } else {
2888     /* receive numeric values */
2889     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2890 
2891     /* receive message of values*/
2892     vals   = buf;
2893     mycols = ibuf;
2894     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2895     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2896     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2897 
2898     /* insert into matrix */
2899     jj      = rstart*bs;
2900     for (i=0; i<m; i++) {
2901       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2902       mycols += locrowlens[i];
2903       vals   += locrowlens[i];
2904       jj++;
2905     }
2906   }
2907   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2908   ierr = PetscFree(buf);CHKERRQ(ierr);
2909   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2910   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2911   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2912   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2913   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2914   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2915 
2916   *newmat = A;
2917   PetscFunctionReturn(0);
2918 }
2919 
2920 #undef __FUNCT__
2921 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2922 /*@
2923    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2924 
2925    Input Parameters:
2926 .  mat  - the matrix
2927 .  fact - factor
2928 
2929    Collective on Mat
2930 
2931    Level: advanced
2932 
2933   Notes:
2934    This can also be set by the command line option: -mat_use_hash_table <fact>
2935 
2936 .keywords: matrix, hashtable, factor, HT
2937 
2938 .seealso: MatSetOption()
2939 @*/
2940 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2941 {
2942   PetscErrorCode ierr,(*f)(Mat,PetscReal);
2943 
2944   PetscFunctionBegin;
2945   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2946   if (f) {
2947     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2948   }
2949   PetscFunctionReturn(0);
2950 }
2951 
2952 EXTERN_C_BEGIN
2953 #undef __FUNCT__
2954 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2955 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2956 {
2957   Mat_MPIBAIJ *baij;
2958 
2959   PetscFunctionBegin;
2960   baij = (Mat_MPIBAIJ*)mat->data;
2961   baij->ht_fact = fact;
2962   PetscFunctionReturn(0);
2963 }
2964 EXTERN_C_END
2965 
2966 #undef __FUNCT__
2967 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2968 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2969 {
2970   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2971   PetscFunctionBegin;
2972   *Ad     = a->A;
2973   *Ao     = a->B;
2974   *colmap = a->garray;
2975   PetscFunctionReturn(0);
2976 }
2977