xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision a4efd8ea4bb0600a5aef92b2c23cbfa314827193)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 
5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
15 
16 #undef __FUNCT__
17 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
18 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
19 {
20   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
21   PetscErrorCode ierr;
22   PetscInt       i,*idxb = 0;
23   PetscScalar    *va,*vb;
24   Vec            vtmp;
25 
26   PetscFunctionBegin;
27   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
28   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
29   if (idx) {
30     for (i=0; i<A->cmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
31   }
32 
33   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
34   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
35   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
36   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
37 
38   for (i=0; i<A->rmap->n; i++){
39     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
40   }
41 
42   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
43   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
44   if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);}
45   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
46   PetscFunctionReturn(0);
47 }
48 
49 EXTERN_C_BEGIN
50 #undef __FUNCT__
51 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
52 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
53 {
54   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
55   PetscErrorCode ierr;
56 
57   PetscFunctionBegin;
58   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
59   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
60   PetscFunctionReturn(0);
61 }
62 EXTERN_C_END
63 
64 EXTERN_C_BEGIN
65 #undef __FUNCT__
66 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
67 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
68 {
69   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
70   PetscErrorCode ierr;
71 
72   PetscFunctionBegin;
73   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
74   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
75   PetscFunctionReturn(0);
76 }
77 EXTERN_C_END
78 
79 /*
80      Local utility routine that creates a mapping from the global column
81    number to the local number in the off-diagonal part of the local
82    storage of the matrix.  This is done in a non scable way since the
83    length of colmap equals the global matrix length.
84 */
85 #undef __FUNCT__
86 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
87 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
88 {
89   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
90   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
91   PetscErrorCode ierr;
92   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
93 
94   PetscFunctionBegin;
95 #if defined (PETSC_USE_CTABLE)
96   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
97   for (i=0; i<nbs; i++){
98     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
99   }
100 #else
101   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
102   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
103   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
105 #endif
106   PetscFunctionReturn(0);
107 }
108 
109 #define CHUNKSIZE  10
110 
111 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
112 { \
113  \
114     brow = row/bs;  \
115     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
116     rmax = aimax[brow]; nrow = ailen[brow]; \
117       bcol = col/bs; \
118       ridx = row % bs; cidx = col % bs; \
119       low = 0; high = nrow; \
120       while (high-low > 3) { \
121         t = (low+high)/2; \
122         if (rp[t] > bcol) high = t; \
123         else              low  = t; \
124       } \
125       for (_i=low; _i<high; _i++) { \
126         if (rp[_i] > bcol) break; \
127         if (rp[_i] == bcol) { \
128           bap  = ap +  bs2*_i + bs*cidx + ridx; \
129           if (addv == ADD_VALUES) *bap += value;  \
130           else                    *bap  = value;  \
131           goto a_noinsert; \
132         } \
133       } \
134       if (a->nonew == 1) goto a_noinsert; \
135       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
136       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
137       N = nrow++ - 1;  \
138       /* shift up all the later entries in this row */ \
139       for (ii=N; ii>=_i; ii--) { \
140         rp[ii+1] = rp[ii]; \
141         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
142       } \
143       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
144       rp[_i]                      = bcol;  \
145       ap[bs2*_i + bs*cidx + ridx] = value;  \
146       a_noinsert:; \
147     ailen[brow] = nrow; \
148 }
149 
150 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
151 { \
152     brow = row/bs;  \
153     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
154     rmax = bimax[brow]; nrow = bilen[brow]; \
155       bcol = col/bs; \
156       ridx = row % bs; cidx = col % bs; \
157       low = 0; high = nrow; \
158       while (high-low > 3) { \
159         t = (low+high)/2; \
160         if (rp[t] > bcol) high = t; \
161         else              low  = t; \
162       } \
163       for (_i=low; _i<high; _i++) { \
164         if (rp[_i] > bcol) break; \
165         if (rp[_i] == bcol) { \
166           bap  = ap +  bs2*_i + bs*cidx + ridx; \
167           if (addv == ADD_VALUES) *bap += value;  \
168           else                    *bap  = value;  \
169           goto b_noinsert; \
170         } \
171       } \
172       if (b->nonew == 1) goto b_noinsert; \
173       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
174       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
175       CHKMEMQ;\
176       N = nrow++ - 1;  \
177       /* shift up all the later entries in this row */ \
178       for (ii=N; ii>=_i; ii--) { \
179         rp[ii+1] = rp[ii]; \
180         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
181       } \
182       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
183       rp[_i]                      = bcol;  \
184       ap[bs2*_i + bs*cidx + ridx] = value;  \
185       b_noinsert:; \
186     bilen[brow] = nrow; \
187 }
188 
189 #undef __FUNCT__
190 #define __FUNCT__ "MatSetValues_MPIBAIJ"
191 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
192 {
193   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
194   MatScalar      value;
195   PetscTruth     roworiented = baij->roworiented;
196   PetscErrorCode ierr;
197   PetscInt       i,j,row,col;
198   PetscInt       rstart_orig=mat->rmap->rstart;
199   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
200   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
201 
202   /* Some Variables required in the macro */
203   Mat            A = baij->A;
204   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
205   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
206   MatScalar      *aa=a->a;
207 
208   Mat            B = baij->B;
209   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
210   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
211   MatScalar      *ba=b->a;
212 
213   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
214   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
215   MatScalar      *ap,*bap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_DEBUG)
221     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
222 #endif
223     if (im[i] >= rstart_orig && im[i] < rend_orig) {
224       row = im[i] - rstart_orig;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart_orig && in[j] < cend_orig){
227           col = in[j] - cstart_orig;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
230           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
231         } else if (in[j] < 0) continue;
232 #if defined(PETSC_USE_DEBUG)
233         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1);}
234 #endif
235         else {
236           if (mat->was_assembled) {
237             if (!baij->colmap) {
238               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
239             }
240 #if defined (PETSC_USE_CTABLE)
241             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
242             col  = col - 1;
243 #else
244             col = baij->colmap[in[j]/bs] - 1;
245 #endif
246             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
247               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
248               col =  in[j];
249               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
250               B = baij->B;
251               b = (Mat_SeqBAIJ*)(B)->data;
252               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
253               ba=b->a;
254             } else col += in[j]%bs;
255           } else col = in[j];
256           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
257           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
258           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
259         }
260       }
261     } else {
262       if (!baij->donotstash) {
263         if (roworiented) {
264           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
265         } else {
266           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
267         }
268       }
269     }
270   }
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
276 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
277 {
278   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
279   const PetscScalar *value;
280   MatScalar         *barray=baij->barray;
281   PetscTruth        roworiented = baij->roworiented;
282   PetscErrorCode    ierr;
283   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
284   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
285   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
286 
287   PetscFunctionBegin;
288   if(!barray) {
289     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
290     baij->barray = barray;
291   }
292 
293   if (roworiented) {
294     stepval = (n-1)*bs;
295   } else {
296     stepval = (m-1)*bs;
297   }
298   for (i=0; i<m; i++) {
299     if (im[i] < 0) continue;
300 #if defined(PETSC_USE_DEBUG)
301     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
302 #endif
303     if (im[i] >= rstart && im[i] < rend) {
304       row = im[i] - rstart;
305       for (j=0; j<n; j++) {
306         /* If NumCol = 1 then a copy is not required */
307         if ((roworiented) && (n == 1)) {
308           barray = (MatScalar*)v + i*bs2;
309         } else if((!roworiented) && (m == 1)) {
310           barray = (MatScalar*)v + j*bs2;
311         } else { /* Here a copy is required */
312           if (roworiented) {
313             value = v + i*(stepval+bs)*bs + j*bs;
314           } else {
315             value = v + j*(stepval+bs)*bs + i*bs;
316           }
317           for (ii=0; ii<bs; ii++,value+=stepval) {
318             for (jj=0; jj<bs; jj++) {
319               *barray++  = *value++;
320             }
321           }
322           barray -=bs2;
323         }
324 
325         if (in[j] >= cstart && in[j] < cend){
326           col  = in[j] - cstart;
327           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
328         }
329         else if (in[j] < 0) continue;
330 #if defined(PETSC_USE_DEBUG)
331         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
332 #endif
333         else {
334           if (mat->was_assembled) {
335             if (!baij->colmap) {
336               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
337             }
338 
339 #if defined(PETSC_USE_DEBUG)
340 #if defined (PETSC_USE_CTABLE)
341             { PetscInt data;
342               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
343               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
344             }
345 #else
346             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
347 #endif
348 #endif
349 #if defined (PETSC_USE_CTABLE)
350 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
351             col  = (col - 1)/bs;
352 #else
353             col = (baij->colmap[in[j]] - 1)/bs;
354 #endif
355             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
356               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
357               col =  in[j];
358             }
359           }
360           else col = in[j];
361           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
362         }
363       }
364     } else {
365       if (!baij->donotstash) {
366         if (roworiented) {
367           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
368         } else {
369           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
370         }
371       }
372     }
373   }
374   PetscFunctionReturn(0);
375 }
376 
377 #define HASH_KEY 0.6180339887
378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
379 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
380 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
381 #undef __FUNCT__
382 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
383 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
384 {
385   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
386   PetscTruth     roworiented = baij->roworiented;
387   PetscErrorCode ierr;
388   PetscInt       i,j,row,col;
389   PetscInt       rstart_orig=mat->rmap->rstart;
390   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
391   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
392   PetscReal      tmp;
393   MatScalar      **HD = baij->hd,value;
394 #if defined(PETSC_USE_DEBUG)
395   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
396 #endif
397 
398   PetscFunctionBegin;
399 
400   for (i=0; i<m; i++) {
401 #if defined(PETSC_USE_DEBUG)
402     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
403     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
404 #endif
405       row = im[i];
406     if (row >= rstart_orig && row < rend_orig) {
407       for (j=0; j<n; j++) {
408         col = in[j];
409         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
410         /* Look up PetscInto the Hash Table */
411         key = (row/bs)*Nbs+(col/bs)+1;
412         h1  = HASH(size,key,tmp);
413 
414 
415         idx = h1;
416 #if defined(PETSC_USE_DEBUG)
417         insert_ct++;
418         total_ct++;
419         if (HT[idx] != key) {
420           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
421           if (idx == size) {
422             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
423             if (idx == h1) {
424               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
425             }
426           }
427         }
428 #else
429         if (HT[idx] != key) {
430           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
431           if (idx == size) {
432             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
433             if (idx == h1) {
434               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
435             }
436           }
437         }
438 #endif
439         /* A HASH table entry is found, so insert the values at the correct address */
440         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
441         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
442       }
443     } else {
444       if (!baij->donotstash) {
445         if (roworiented) {
446           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
447         } else {
448           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
449         }
450       }
451     }
452   }
453 #if defined(PETSC_USE_DEBUG)
454   baij->ht_total_ct = total_ct;
455   baij->ht_insert_ct = insert_ct;
456 #endif
457   PetscFunctionReturn(0);
458 }
459 
460 #undef __FUNCT__
461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
463 {
464   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
465   PetscTruth        roworiented = baij->roworiented;
466   PetscErrorCode    ierr;
467   PetscInt          i,j,ii,jj,row,col;
468   PetscInt          rstart=baij->rstartbs;
469   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
470   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
471   PetscReal         tmp;
472   MatScalar         **HD = baij->hd,*baij_a;
473   const PetscScalar *v_t,*value;
474 #if defined(PETSC_USE_DEBUG)
475   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
476 #endif
477 
478   PetscFunctionBegin;
479 
480   if (roworiented) {
481     stepval = (n-1)*bs;
482   } else {
483     stepval = (m-1)*bs;
484   }
485   for (i=0; i<m; i++) {
486 #if defined(PETSC_USE_DEBUG)
487     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
488     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
489 #endif
490     row   = im[i];
491     v_t   = v + i*nbs2;
492     if (row >= rstart && row < rend) {
493       for (j=0; j<n; j++) {
494         col = in[j];
495 
496         /* Look up into the Hash Table */
497         key = row*Nbs+col+1;
498         h1  = HASH(size,key,tmp);
499 
500         idx = h1;
501 #if defined(PETSC_USE_DEBUG)
502         total_ct++;
503         insert_ct++;
504        if (HT[idx] != key) {
505           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
506           if (idx == size) {
507             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
508             if (idx == h1) {
509               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
510             }
511           }
512         }
513 #else
514         if (HT[idx] != key) {
515           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
516           if (idx == size) {
517             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
518             if (idx == h1) {
519               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
520             }
521           }
522         }
523 #endif
524         baij_a = HD[idx];
525         if (roworiented) {
526           /*value = v + i*(stepval+bs)*bs + j*bs;*/
527           /* value = v + (i*(stepval+bs)+j)*bs; */
528           value = v_t;
529           v_t  += bs;
530           if (addv == ADD_VALUES) {
531             for (ii=0; ii<bs; ii++,value+=stepval) {
532               for (jj=ii; jj<bs2; jj+=bs) {
533                 baij_a[jj]  += *value++;
534               }
535             }
536           } else {
537             for (ii=0; ii<bs; ii++,value+=stepval) {
538               for (jj=ii; jj<bs2; jj+=bs) {
539                 baij_a[jj]  = *value++;
540               }
541             }
542           }
543         } else {
544           value = v + j*(stepval+bs)*bs + i*bs;
545           if (addv == ADD_VALUES) {
546             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
547               for (jj=0; jj<bs; jj++) {
548                 baij_a[jj]  += *value++;
549               }
550             }
551           } else {
552             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
553               for (jj=0; jj<bs; jj++) {
554                 baij_a[jj]  = *value++;
555               }
556             }
557           }
558         }
559       }
560     } else {
561       if (!baij->donotstash) {
562         if (roworiented) {
563           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
564         } else {
565           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
566         }
567       }
568     }
569   }
570 #if defined(PETSC_USE_DEBUG)
571   baij->ht_total_ct = total_ct;
572   baij->ht_insert_ct = insert_ct;
573 #endif
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatGetValues_MPIBAIJ"
579 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
580 {
581   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
582   PetscErrorCode ierr;
583   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
584   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
585 
586   PetscFunctionBegin;
587   for (i=0; i<m; i++) {
588     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
589     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
590     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
591       row = idxm[i] - bsrstart;
592       for (j=0; j<n; j++) {
593         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
594         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
595         if (idxn[j] >= bscstart && idxn[j] < bscend){
596           col = idxn[j] - bscstart;
597           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
598         } else {
599           if (!baij->colmap) {
600             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
601           }
602 #if defined (PETSC_USE_CTABLE)
603           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
604           data --;
605 #else
606           data = baij->colmap[idxn[j]/bs]-1;
607 #endif
608           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
609           else {
610             col  = data + idxn[j]%bs;
611             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
612           }
613         }
614       }
615     } else {
616       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
617     }
618   }
619  PetscFunctionReturn(0);
620 }
621 
622 #undef __FUNCT__
623 #define __FUNCT__ "MatNorm_MPIBAIJ"
624 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
625 {
626   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
627   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
628   PetscErrorCode ierr;
629   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
630   PetscReal      sum = 0.0;
631   MatScalar      *v;
632 
633   PetscFunctionBegin;
634   if (baij->size == 1) {
635     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
636   } else {
637     if (type == NORM_FROBENIUS) {
638       v = amat->a;
639       nz = amat->nz*bs2;
640       for (i=0; i<nz; i++) {
641 #if defined(PETSC_USE_COMPLEX)
642         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
643 #else
644         sum += (*v)*(*v); v++;
645 #endif
646       }
647       v = bmat->a;
648       nz = bmat->nz*bs2;
649       for (i=0; i<nz; i++) {
650 #if defined(PETSC_USE_COMPLEX)
651         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
652 #else
653         sum += (*v)*(*v); v++;
654 #endif
655       }
656       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
657       *nrm = sqrt(*nrm);
658     } else if (type == NORM_1) { /* max column sum */
659       PetscReal *tmp,*tmp2;
660       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
661       ierr = PetscMalloc((2*mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
662       tmp2 = tmp + mat->cmap->N;
663       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
664       v = amat->a; jj = amat->j;
665       for (i=0; i<amat->nz; i++) {
666         for (j=0; j<bs; j++){
667           col = bs*(cstart + *jj) + j; /* column index */
668           for (row=0; row<bs; row++){
669             tmp[col] += PetscAbsScalar(*v);  v++;
670           }
671         }
672         jj++;
673       }
674       v = bmat->a; jj = bmat->j;
675       for (i=0; i<bmat->nz; i++) {
676         for (j=0; j<bs; j++){
677           col = bs*garray[*jj] + j;
678           for (row=0; row<bs; row++){
679             tmp[col] += PetscAbsScalar(*v); v++;
680           }
681         }
682         jj++;
683       }
684       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
685       *nrm = 0.0;
686       for (j=0; j<mat->cmap->N; j++) {
687         if (tmp2[j] > *nrm) *nrm = tmp2[j];
688       }
689       ierr = PetscFree(tmp);CHKERRQ(ierr);
690     } else if (type == NORM_INFINITY) { /* max row sum */
691       PetscReal *sums;
692       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
693       sum = 0.0;
694       for (j=0; j<amat->mbs; j++) {
695         for (row=0; row<bs; row++) sums[row] = 0.0;
696         v = amat->a + bs2*amat->i[j];
697         nz = amat->i[j+1]-amat->i[j];
698         for (i=0; i<nz; i++) {
699           for (col=0; col<bs; col++){
700             for (row=0; row<bs; row++){
701               sums[row] += PetscAbsScalar(*v); v++;
702             }
703           }
704         }
705         v = bmat->a + bs2*bmat->i[j];
706         nz = bmat->i[j+1]-bmat->i[j];
707         for (i=0; i<nz; i++) {
708           for (col=0; col<bs; col++){
709             for (row=0; row<bs; row++){
710               sums[row] += PetscAbsScalar(*v); v++;
711             }
712           }
713         }
714         for (row=0; row<bs; row++){
715           if (sums[row] > sum) sum = sums[row];
716         }
717       }
718       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
719       ierr = PetscFree(sums);CHKERRQ(ierr);
720     } else {
721       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
722     }
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 /*
728   Creates the hash table, and sets the table
729   This table is created only once.
730   If new entried need to be added to the matrix
731   then the hash table has to be destroyed and
732   recreated.
733 */
734 #undef __FUNCT__
735 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
736 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
737 {
738   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
739   Mat            A = baij->A,B=baij->B;
740   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
741   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
742   PetscErrorCode ierr;
743   PetscInt       size,bs2=baij->bs2,rstart=baij->rstartbs;
744   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
745   PetscInt       *HT,key;
746   MatScalar      **HD;
747   PetscReal      tmp;
748 #if defined(PETSC_USE_INFO)
749   PetscInt       ct=0,max=0;
750 #endif
751 
752   PetscFunctionBegin;
753   baij->ht_size=(PetscInt)(factor*nz);
754   size = baij->ht_size;
755 
756   if (baij->ht) {
757     PetscFunctionReturn(0);
758   }
759 
760   /* Allocate Memory for Hash Table */
761   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
762   baij->ht = (PetscInt*)(baij->hd + size);
763   HD       = baij->hd;
764   HT       = baij->ht;
765 
766 
767   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
768 
769 
770   /* Loop Over A */
771   for (i=0; i<a->mbs; i++) {
772     for (j=ai[i]; j<ai[i+1]; j++) {
773       row = i+rstart;
774       col = aj[j]+cstart;
775 
776       key = row*Nbs + col + 1;
777       h1  = HASH(size,key,tmp);
778       for (k=0; k<size; k++){
779         if (!HT[(h1+k)%size]) {
780           HT[(h1+k)%size] = key;
781           HD[(h1+k)%size] = a->a + j*bs2;
782           break;
783 #if defined(PETSC_USE_INFO)
784         } else {
785           ct++;
786 #endif
787         }
788       }
789 #if defined(PETSC_USE_INFO)
790       if (k> max) max = k;
791 #endif
792     }
793   }
794   /* Loop Over B */
795   for (i=0; i<b->mbs; i++) {
796     for (j=bi[i]; j<bi[i+1]; j++) {
797       row = i+rstart;
798       col = garray[bj[j]];
799       key = row*Nbs + col + 1;
800       h1  = HASH(size,key,tmp);
801       for (k=0; k<size; k++){
802         if (!HT[(h1+k)%size]) {
803           HT[(h1+k)%size] = key;
804           HD[(h1+k)%size] = b->a + j*bs2;
805           break;
806 #if defined(PETSC_USE_INFO)
807         } else {
808           ct++;
809 #endif
810         }
811       }
812 #if defined(PETSC_USE_INFO)
813       if (k> max) max = k;
814 #endif
815     }
816   }
817 
818   /* Print Summary */
819 #if defined(PETSC_USE_INFO)
820   for (i=0,j=0; i<size; i++) {
821     if (HT[i]) {j++;}
822   }
823   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
824 #endif
825   PetscFunctionReturn(0);
826 }
827 
828 #undef __FUNCT__
829 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
830 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
831 {
832   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
833   PetscErrorCode ierr;
834   PetscInt       nstash,reallocs;
835   InsertMode     addv;
836 
837   PetscFunctionBegin;
838   if (baij->donotstash) {
839     PetscFunctionReturn(0);
840   }
841 
842   /* make sure all processors are either in INSERTMODE or ADDMODE */
843   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
844   if (addv == (ADD_VALUES|INSERT_VALUES)) {
845     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
846   }
847   mat->insertmode = addv; /* in case this processor had no cache */
848 
849   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
850   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
851   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
852   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
853   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
854   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
855   PetscFunctionReturn(0);
856 }
857 
858 #undef __FUNCT__
859 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
860 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
861 {
862   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
863   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
864   PetscErrorCode ierr;
865   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
866   PetscInt       *row,*col;
867   PetscTruth     r1,r2,r3,other_disassembled;
868   MatScalar      *val;
869   InsertMode     addv = mat->insertmode;
870   PetscMPIInt    n;
871 
872   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
873   PetscFunctionBegin;
874   if (!baij->donotstash) {
875     while (1) {
876       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
877       if (!flg) break;
878 
879       for (i=0; i<n;) {
880         /* Now identify the consecutive vals belonging to the same row */
881         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
882         if (j < n) ncols = j-i;
883         else       ncols = n-i;
884         /* Now assemble all these values with a single function call */
885         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
886         i = j;
887       }
888     }
889     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
890     /* Now process the block-stash. Since the values are stashed column-oriented,
891        set the roworiented flag to column oriented, and after MatSetValues()
892        restore the original flags */
893     r1 = baij->roworiented;
894     r2 = a->roworiented;
895     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
896     baij->roworiented = PETSC_FALSE;
897     a->roworiented    = PETSC_FALSE;
898     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
899     while (1) {
900       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
901       if (!flg) break;
902 
903       for (i=0; i<n;) {
904         /* Now identify the consecutive vals belonging to the same row */
905         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
906         if (j < n) ncols = j-i;
907         else       ncols = n-i;
908         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
909         i = j;
910       }
911     }
912     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
913     baij->roworiented = r1;
914     a->roworiented    = r2;
915     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
916   }
917 
918   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
919   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
920 
921   /* determine if any processor has disassembled, if so we must
922      also disassemble ourselfs, in order that we may reassemble. */
923   /*
924      if nonzero structure of submatrix B cannot change then we know that
925      no processor disassembled thus we can skip this stuff
926   */
927   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
928     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
929     if (mat->was_assembled && !other_disassembled) {
930       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
931     }
932   }
933 
934   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
935     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
936   }
937   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
938   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
939   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
940 
941 #if defined(PETSC_USE_INFO)
942   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
943     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
944     baij->ht_total_ct  = 0;
945     baij->ht_insert_ct = 0;
946   }
947 #endif
948   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
949     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
950     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
951     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
952   }
953 
954   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
955   baij->rowvalues = 0;
956   PetscFunctionReturn(0);
957 }
958 
959 #undef __FUNCT__
960 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
961 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
962 {
963   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
964   PetscErrorCode    ierr;
965   PetscMPIInt       size = baij->size,rank = baij->rank;
966   PetscInt          bs = mat->rmap->bs;
967   PetscTruth        iascii,isdraw;
968   PetscViewer       sviewer;
969   PetscViewerFormat format;
970 
971   PetscFunctionBegin;
972   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
973   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
974   if (iascii) {
975     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
976     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
977       MatInfo info;
978       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
979       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
980       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
981               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
982               mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
983       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
984       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
985       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
986       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
987       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
988       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
989       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
990       PetscFunctionReturn(0);
991     } else if (format == PETSC_VIEWER_ASCII_INFO) {
992       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
993       PetscFunctionReturn(0);
994     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
995       PetscFunctionReturn(0);
996     }
997   }
998 
999   if (isdraw) {
1000     PetscDraw       draw;
1001     PetscTruth isnull;
1002     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1003     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1004   }
1005 
1006   if (size == 1) {
1007     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1008     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1009   } else {
1010     /* assemble the entire matrix onto first processor. */
1011     Mat         A;
1012     Mat_SeqBAIJ *Aloc;
1013     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1014     MatScalar   *a;
1015 
1016     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1017     /* Perhaps this should be the type of mat? */
1018     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1019     if (!rank) {
1020       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1021     } else {
1022       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1023     }
1024     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1025     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1026     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1027 
1028     /* copy over the A part */
1029     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1030     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1031     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1032 
1033     for (i=0; i<mbs; i++) {
1034       rvals[0] = bs*(baij->rstartbs + i);
1035       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1036       for (j=ai[i]; j<ai[i+1]; j++) {
1037         col = (baij->cstartbs+aj[j])*bs;
1038         for (k=0; k<bs; k++) {
1039           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1040           col++; a += bs;
1041         }
1042       }
1043     }
1044     /* copy over the B part */
1045     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1046     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1047     for (i=0; i<mbs; i++) {
1048       rvals[0] = bs*(baij->rstartbs + i);
1049       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1050       for (j=ai[i]; j<ai[i+1]; j++) {
1051         col = baij->garray[aj[j]]*bs;
1052         for (k=0; k<bs; k++) {
1053           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1054           col++; a += bs;
1055         }
1056       }
1057     }
1058     ierr = PetscFree(rvals);CHKERRQ(ierr);
1059     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1060     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1061     /*
1062        Everyone has to call to draw the matrix since the graphics waits are
1063        synchronized across all processors that share the PetscDraw object
1064     */
1065     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1066     if (!rank) {
1067       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1068       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1069     }
1070     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1071     ierr = MatDestroy(A);CHKERRQ(ierr);
1072   }
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 #undef __FUNCT__
1077 #define __FUNCT__ "MatView_MPIBAIJ"
1078 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1079 {
1080   PetscErrorCode ierr;
1081   PetscTruth     iascii,isdraw,issocket,isbinary;
1082 
1083   PetscFunctionBegin;
1084   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1085   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1086   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1087   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1088   if (iascii || isdraw || issocket || isbinary) {
1089     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1090   } else {
1091     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1092   }
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 #undef __FUNCT__
1097 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1098 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1099 {
1100   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1101   PetscErrorCode ierr;
1102 
1103   PetscFunctionBegin;
1104 #if defined(PETSC_USE_LOG)
1105   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1106 #endif
1107   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1108   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1109   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1110   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1111 #if defined (PETSC_USE_CTABLE)
1112   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1113 #else
1114   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1115 #endif
1116   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1117   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1118   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1119   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1120   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1121   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1122   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1123   ierr = PetscFree(baij);CHKERRQ(ierr);
1124 
1125   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1126   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1127   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1128   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1129   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1130   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1131   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1132   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 #undef __FUNCT__
1137 #define __FUNCT__ "MatMult_MPIBAIJ"
1138 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1139 {
1140   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1141   PetscErrorCode ierr;
1142   PetscInt       nt;
1143 
1144   PetscFunctionBegin;
1145   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1146   if (nt != A->cmap->n) {
1147     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1148   }
1149   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1150   if (nt != A->rmap->n) {
1151     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1152   }
1153   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1154   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1155   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1156   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1157   PetscFunctionReturn(0);
1158 }
1159 
1160 #undef __FUNCT__
1161 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1162 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1163 {
1164   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1165   PetscErrorCode ierr;
1166 
1167   PetscFunctionBegin;
1168   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1169   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1170   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1171   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 #undef __FUNCT__
1176 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1177 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1178 {
1179   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1180   PetscErrorCode ierr;
1181   PetscTruth     merged;
1182 
1183   PetscFunctionBegin;
1184   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1185   /* do nondiagonal part */
1186   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1187   if (!merged) {
1188     /* send it on its way */
1189     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1190     /* do local part */
1191     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1192     /* receive remote parts: note this assumes the values are not actually */
1193     /* inserted in yy until the next line */
1194     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1195   } else {
1196     /* do local part */
1197     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1198     /* send it on its way */
1199     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1200     /* values actually were received in the Begin() but we need to call this nop */
1201     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1202   }
1203   PetscFunctionReturn(0);
1204 }
1205 
1206 #undef __FUNCT__
1207 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1208 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1209 {
1210   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1211   PetscErrorCode ierr;
1212 
1213   PetscFunctionBegin;
1214   /* do nondiagonal part */
1215   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1216   /* send it on its way */
1217   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1218   /* do local part */
1219   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1220   /* receive remote parts: note this assumes the values are not actually */
1221   /* inserted in yy until the next line, which is true for my implementation*/
1222   /* but is not perhaps always true. */
1223   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 /*
1228   This only works correctly for square matrices where the subblock A->A is the
1229    diagonal block
1230 */
1231 #undef __FUNCT__
1232 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1233 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1234 {
1235   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1236   PetscErrorCode ierr;
1237 
1238   PetscFunctionBegin;
1239   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1240   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatScale_MPIBAIJ"
1246 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1247 {
1248   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1249   PetscErrorCode ierr;
1250 
1251   PetscFunctionBegin;
1252   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1253   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #undef __FUNCT__
1258 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1259 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1260 {
1261   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1262   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1263   PetscErrorCode ierr;
1264   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1265   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1266   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1267 
1268   PetscFunctionBegin;
1269   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1270   mat->getrowactive = PETSC_TRUE;
1271 
1272   if (!mat->rowvalues && (idx || v)) {
1273     /*
1274         allocate enough space to hold information from the longest row.
1275     */
1276     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1277     PetscInt     max = 1,mbs = mat->mbs,tmp;
1278     for (i=0; i<mbs; i++) {
1279       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1280       if (max < tmp) { max = tmp; }
1281     }
1282     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1283     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1284   }
1285 
1286   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1287   lrow = row - brstart;
1288 
1289   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1290   if (!v)   {pvA = 0; pvB = 0;}
1291   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1292   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1293   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1294   nztot = nzA + nzB;
1295 
1296   cmap  = mat->garray;
1297   if (v  || idx) {
1298     if (nztot) {
1299       /* Sort by increasing column numbers, assuming A and B already sorted */
1300       PetscInt imark = -1;
1301       if (v) {
1302         *v = v_p = mat->rowvalues;
1303         for (i=0; i<nzB; i++) {
1304           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1305           else break;
1306         }
1307         imark = i;
1308         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1309         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1310       }
1311       if (idx) {
1312         *idx = idx_p = mat->rowindices;
1313         if (imark > -1) {
1314           for (i=0; i<imark; i++) {
1315             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1316           }
1317         } else {
1318           for (i=0; i<nzB; i++) {
1319             if (cmap[cworkB[i]/bs] < cstart)
1320               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1321             else break;
1322           }
1323           imark = i;
1324         }
1325         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1326         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1327       }
1328     } else {
1329       if (idx) *idx = 0;
1330       if (v)   *v   = 0;
1331     }
1332   }
1333   *nz = nztot;
1334   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1335   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #undef __FUNCT__
1340 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1341 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1342 {
1343   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1344 
1345   PetscFunctionBegin;
1346   if (!baij->getrowactive) {
1347     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1348   }
1349   baij->getrowactive = PETSC_FALSE;
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1355 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1356 {
1357   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1358   PetscErrorCode ierr;
1359 
1360   PetscFunctionBegin;
1361   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1362   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1368 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1369 {
1370   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1371   Mat            A = a->A,B = a->B;
1372   PetscErrorCode ierr;
1373   PetscReal      isend[5],irecv[5];
1374 
1375   PetscFunctionBegin;
1376   info->block_size     = (PetscReal)matin->rmap->bs;
1377   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1378   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1379   isend[3] = info->memory;  isend[4] = info->mallocs;
1380   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1381   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1382   isend[3] += info->memory;  isend[4] += info->mallocs;
1383   if (flag == MAT_LOCAL) {
1384     info->nz_used      = isend[0];
1385     info->nz_allocated = isend[1];
1386     info->nz_unneeded  = isend[2];
1387     info->memory       = isend[3];
1388     info->mallocs      = isend[4];
1389   } else if (flag == MAT_GLOBAL_MAX) {
1390     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1391     info->nz_used      = irecv[0];
1392     info->nz_allocated = irecv[1];
1393     info->nz_unneeded  = irecv[2];
1394     info->memory       = irecv[3];
1395     info->mallocs      = irecv[4];
1396   } else if (flag == MAT_GLOBAL_SUM) {
1397     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1398     info->nz_used      = irecv[0];
1399     info->nz_allocated = irecv[1];
1400     info->nz_unneeded  = irecv[2];
1401     info->memory       = irecv[3];
1402     info->mallocs      = irecv[4];
1403   } else {
1404     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1405   }
1406   info->rows_global       = (PetscReal)A->rmap->N;
1407   info->columns_global    = (PetscReal)A->cmap->N;
1408   info->rows_local        = (PetscReal)A->rmap->N;
1409   info->columns_local     = (PetscReal)A->cmap->N;
1410   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1411   info->fill_ratio_needed = 0;
1412   info->factor_mallocs    = 0;
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 #undef __FUNCT__
1417 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1418 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg)
1419 {
1420   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1421   PetscErrorCode ierr;
1422 
1423   PetscFunctionBegin;
1424   switch (op) {
1425   case MAT_NEW_NONZERO_LOCATIONS:
1426   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1427   case MAT_KEEP_ZEROED_ROWS:
1428   case MAT_NEW_NONZERO_LOCATION_ERR:
1429     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1430     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1431     break;
1432   case MAT_ROW_ORIENTED:
1433     a->roworiented = flg;
1434     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1435     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1436     break;
1437   case MAT_NEW_DIAGONALS:
1438     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1439     break;
1440   case MAT_IGNORE_OFF_PROC_ENTRIES:
1441     a->donotstash = flg;
1442     break;
1443   case MAT_USE_HASH_TABLE:
1444     a->ht_flag = flg;
1445     break;
1446   case MAT_SYMMETRIC:
1447   case MAT_STRUCTURALLY_SYMMETRIC:
1448   case MAT_HERMITIAN:
1449   case MAT_SYMMETRY_ETERNAL:
1450     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1451     break;
1452   default:
1453     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1454   }
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1460 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1461 {
1462   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1463   Mat_SeqBAIJ    *Aloc;
1464   Mat            B;
1465   PetscErrorCode ierr;
1466   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1467   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1468   MatScalar      *a;
1469 
1470   PetscFunctionBegin;
1471   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1472   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1473     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1474     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1475     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1476     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1477   } else {
1478     B = *matout;
1479   }
1480 
1481   /* copy over the A part */
1482   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1483   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1484   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1485 
1486   for (i=0; i<mbs; i++) {
1487     rvals[0] = bs*(baij->rstartbs + i);
1488     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1489     for (j=ai[i]; j<ai[i+1]; j++) {
1490       col = (baij->cstartbs+aj[j])*bs;
1491       for (k=0; k<bs; k++) {
1492         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1493         col++; a += bs;
1494       }
1495     }
1496   }
1497   /* copy over the B part */
1498   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1499   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1500   for (i=0; i<mbs; i++) {
1501     rvals[0] = bs*(baij->rstartbs + i);
1502     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1503     for (j=ai[i]; j<ai[i+1]; j++) {
1504       col = baij->garray[aj[j]]*bs;
1505       for (k=0; k<bs; k++) {
1506         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1507         col++; a += bs;
1508       }
1509     }
1510   }
1511   ierr = PetscFree(rvals);CHKERRQ(ierr);
1512   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1513   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1514 
1515   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1516     *matout = B;
1517   } else {
1518     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1519   }
1520   PetscFunctionReturn(0);
1521 }
1522 
1523 #undef __FUNCT__
1524 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1525 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1526 {
1527   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1528   Mat            a = baij->A,b = baij->B;
1529   PetscErrorCode ierr;
1530   PetscInt       s1,s2,s3;
1531 
1532   PetscFunctionBegin;
1533   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1534   if (rr) {
1535     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1536     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1537     /* Overlap communication with computation. */
1538     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1539   }
1540   if (ll) {
1541     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1542     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1543     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1544   }
1545   /* scale  the diagonal block */
1546   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1547 
1548   if (rr) {
1549     /* Do a scatter end and then right scale the off-diagonal block */
1550     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1551     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1552   }
1553 
1554   PetscFunctionReturn(0);
1555 }
1556 
1557 #undef __FUNCT__
1558 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1559 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1560 {
1561   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1562   PetscErrorCode ierr;
1563   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1564   PetscInt       i,*owners = A->rmap->range;
1565   PetscInt       *nprocs,j,idx,nsends,row;
1566   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1567   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1568   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1569   MPI_Comm       comm = ((PetscObject)A)->comm;
1570   MPI_Request    *send_waits,*recv_waits;
1571   MPI_Status     recv_status,*send_status;
1572 #if defined(PETSC_DEBUG)
1573   PetscTruth     found = PETSC_FALSE;
1574 #endif
1575 
1576   PetscFunctionBegin;
1577   /*  first count number of contributors to each processor */
1578   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1579   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1580   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1581   j = 0;
1582   for (i=0; i<N; i++) {
1583     if (lastidx > (idx = rows[i])) j = 0;
1584     lastidx = idx;
1585     for (; j<size; j++) {
1586       if (idx >= owners[j] && idx < owners[j+1]) {
1587         nprocs[2*j]++;
1588         nprocs[2*j+1] = 1;
1589         owner[i] = j;
1590 #if defined(PETSC_DEBUG)
1591         found = PETSC_TRUE;
1592 #endif
1593         break;
1594       }
1595     }
1596 #if defined(PETSC_DEBUG)
1597     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1598     found = PETSC_FALSE;
1599 #endif
1600   }
1601   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1602 
1603   /* inform other processors of number of messages and max length*/
1604   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1605 
1606   /* post receives:   */
1607   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1608   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1609   for (i=0; i<nrecvs; i++) {
1610     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1611   }
1612 
1613   /* do sends:
1614      1) starts[i] gives the starting index in svalues for stuff going to
1615      the ith processor
1616   */
1617   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1618   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1619   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1620   starts[0]  = 0;
1621   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1622   for (i=0; i<N; i++) {
1623     svalues[starts[owner[i]]++] = rows[i];
1624   }
1625 
1626   starts[0] = 0;
1627   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1628   count = 0;
1629   for (i=0; i<size; i++) {
1630     if (nprocs[2*i+1]) {
1631       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1632     }
1633   }
1634   ierr = PetscFree(starts);CHKERRQ(ierr);
1635 
1636   base = owners[rank];
1637 
1638   /*  wait on receives */
1639   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
1640   source = lens + nrecvs;
1641   count  = nrecvs; slen = 0;
1642   while (count) {
1643     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1644     /* unpack receives into our local space */
1645     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1646     source[imdex]  = recv_status.MPI_SOURCE;
1647     lens[imdex]    = n;
1648     slen          += n;
1649     count--;
1650   }
1651   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1652 
1653   /* move the data into the send scatter */
1654   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1655   count = 0;
1656   for (i=0; i<nrecvs; i++) {
1657     values = rvalues + i*nmax;
1658     for (j=0; j<lens[i]; j++) {
1659       lrows[count++] = values[j] - base;
1660     }
1661   }
1662   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1663   ierr = PetscFree(lens);CHKERRQ(ierr);
1664   ierr = PetscFree(owner);CHKERRQ(ierr);
1665   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1666 
1667   /* actually zap the local rows */
1668   /*
1669         Zero the required rows. If the "diagonal block" of the matrix
1670      is square and the user wishes to set the diagonal we use separate
1671      code so that MatSetValues() is not called for each diagonal allocating
1672      new memory, thus calling lots of mallocs and slowing things down.
1673 
1674        Contributed by: Matthew Knepley
1675   */
1676   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1677   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1678   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1679     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1680   } else if (diag != 0.0) {
1681     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1682     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1683       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1684 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1685     }
1686     for (i=0; i<slen; i++) {
1687       row  = lrows[i] + rstart_bs;
1688       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1689     }
1690     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1691     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1692   } else {
1693     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1694   }
1695 
1696   ierr = PetscFree(lrows);CHKERRQ(ierr);
1697 
1698   /* wait on sends */
1699   if (nsends) {
1700     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1701     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1702     ierr = PetscFree(send_status);CHKERRQ(ierr);
1703   }
1704   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1705   ierr = PetscFree(svalues);CHKERRQ(ierr);
1706 
1707   PetscFunctionReturn(0);
1708 }
1709 
1710 #undef __FUNCT__
1711 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1712 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1713 {
1714   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1715   PetscErrorCode ierr;
1716 
1717   PetscFunctionBegin;
1718   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1719   PetscFunctionReturn(0);
1720 }
1721 
1722 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1723 
1724 #undef __FUNCT__
1725 #define __FUNCT__ "MatEqual_MPIBAIJ"
1726 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1727 {
1728   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1729   Mat            a,b,c,d;
1730   PetscTruth     flg;
1731   PetscErrorCode ierr;
1732 
1733   PetscFunctionBegin;
1734   a = matA->A; b = matA->B;
1735   c = matB->A; d = matB->B;
1736 
1737   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1738   if (flg) {
1739     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1740   }
1741   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 #undef __FUNCT__
1746 #define __FUNCT__ "MatCopy_MPIBAIJ"
1747 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1748 {
1749   PetscErrorCode ierr;
1750   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1751   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1752 
1753   PetscFunctionBegin;
1754   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1755   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1756     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1757   } else {
1758     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1759     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1760   }
1761   PetscFunctionReturn(0);
1762 }
1763 
1764 #undef __FUNCT__
1765 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1766 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1767 {
1768   PetscErrorCode ierr;
1769 
1770   PetscFunctionBegin;
1771   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1772   PetscFunctionReturn(0);
1773 }
1774 
1775 #include "petscblaslapack.h"
1776 #undef __FUNCT__
1777 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1778 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1779 {
1780   PetscErrorCode ierr;
1781   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1782   PetscBLASInt   bnz,one=1;
1783   Mat_SeqBAIJ    *x,*y;
1784 
1785   PetscFunctionBegin;
1786   if (str == SAME_NONZERO_PATTERN) {
1787     PetscScalar alpha = a;
1788     x = (Mat_SeqBAIJ *)xx->A->data;
1789     y = (Mat_SeqBAIJ *)yy->A->data;
1790     bnz = PetscBLASIntCast(x->nz);
1791     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1792     x = (Mat_SeqBAIJ *)xx->B->data;
1793     y = (Mat_SeqBAIJ *)yy->B->data;
1794     bnz = PetscBLASIntCast(x->nz);
1795     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1796   } else {
1797     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1798   }
1799   PetscFunctionReturn(0);
1800 }
1801 
1802 #undef __FUNCT__
1803 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1804 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1805 {
1806   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1807   PetscErrorCode ierr;
1808 
1809   PetscFunctionBegin;
1810   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1811   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1812   PetscFunctionReturn(0);
1813 }
1814 
1815 #undef __FUNCT__
1816 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1817 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1818 {
1819   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1820   PetscErrorCode ierr;
1821 
1822   PetscFunctionBegin;
1823   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1824   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1825   PetscFunctionReturn(0);
1826 }
1827 
1828 /* -------------------------------------------------------------------*/
1829 static struct _MatOps MatOps_Values = {
1830        MatSetValues_MPIBAIJ,
1831        MatGetRow_MPIBAIJ,
1832        MatRestoreRow_MPIBAIJ,
1833        MatMult_MPIBAIJ,
1834 /* 4*/ MatMultAdd_MPIBAIJ,
1835        MatMultTranspose_MPIBAIJ,
1836        MatMultTransposeAdd_MPIBAIJ,
1837        0,
1838        0,
1839        0,
1840 /*10*/ 0,
1841        0,
1842        0,
1843        0,
1844        MatTranspose_MPIBAIJ,
1845 /*15*/ MatGetInfo_MPIBAIJ,
1846        MatEqual_MPIBAIJ,
1847        MatGetDiagonal_MPIBAIJ,
1848        MatDiagonalScale_MPIBAIJ,
1849        MatNorm_MPIBAIJ,
1850 /*20*/ MatAssemblyBegin_MPIBAIJ,
1851        MatAssemblyEnd_MPIBAIJ,
1852        0,
1853        MatSetOption_MPIBAIJ,
1854        MatZeroEntries_MPIBAIJ,
1855 /*25*/ MatZeroRows_MPIBAIJ,
1856        0,
1857        0,
1858        0,
1859        0,
1860 /*30*/ MatSetUpPreallocation_MPIBAIJ,
1861        0,
1862        0,
1863        0,
1864        0,
1865 /*35*/ MatDuplicate_MPIBAIJ,
1866        0,
1867        0,
1868        0,
1869        0,
1870 /*40*/ MatAXPY_MPIBAIJ,
1871        MatGetSubMatrices_MPIBAIJ,
1872        MatIncreaseOverlap_MPIBAIJ,
1873        MatGetValues_MPIBAIJ,
1874        MatCopy_MPIBAIJ,
1875 /*45*/ 0,
1876        MatScale_MPIBAIJ,
1877        0,
1878        0,
1879        0,
1880 /*50*/ 0,
1881        0,
1882        0,
1883        0,
1884        0,
1885 /*55*/ 0,
1886        0,
1887        MatSetUnfactored_MPIBAIJ,
1888        0,
1889        MatSetValuesBlocked_MPIBAIJ,
1890 /*60*/ 0,
1891        MatDestroy_MPIBAIJ,
1892        MatView_MPIBAIJ,
1893        0,
1894        0,
1895 /*65*/ 0,
1896        0,
1897        0,
1898        0,
1899        0,
1900 /*70*/ MatGetRowMaxAbs_MPIBAIJ,
1901        0,
1902        0,
1903        0,
1904        0,
1905 /*75*/ 0,
1906        0,
1907        0,
1908        0,
1909        0,
1910 /*80*/ 0,
1911        0,
1912        0,
1913        0,
1914        MatLoad_MPIBAIJ,
1915 /*85*/ 0,
1916        0,
1917        0,
1918        0,
1919        0,
1920 /*90*/ 0,
1921        0,
1922        0,
1923        0,
1924        0,
1925 /*95*/ 0,
1926        0,
1927        0,
1928        0,
1929        0,
1930 /*100*/0,
1931        0,
1932        0,
1933        0,
1934        0,
1935 /*105*/0,
1936        MatRealPart_MPIBAIJ,
1937        MatImaginaryPart_MPIBAIJ};
1938 
1939 
1940 EXTERN_C_BEGIN
1941 #undef __FUNCT__
1942 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
1943 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1944 {
1945   PetscFunctionBegin;
1946   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1947   *iscopy = PETSC_FALSE;
1948   PetscFunctionReturn(0);
1949 }
1950 EXTERN_C_END
1951 
1952 EXTERN_C_BEGIN
1953 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
1954 EXTERN_C_END
1955 
1956 EXTERN_C_BEGIN
1957 #undef __FUNCT__
1958 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
1959 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1960 {
1961   PetscInt       m,rstart,cstart,cend;
1962   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1963   const PetscInt *JJ=0;
1964   PetscScalar    *values=0;
1965   PetscErrorCode ierr;
1966 
1967   PetscFunctionBegin;
1968 
1969   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1970   B->rmap->bs = bs;
1971   B->cmap->bs = bs;
1972   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
1973   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
1974   m      = B->rmap->n/bs;
1975   rstart = B->rmap->rstart/bs;
1976   cstart = B->cmap->rstart/bs;
1977   cend   = B->cmap->rend/bs;
1978 
1979   if (ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1980   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1981   o_nnz = d_nnz + m;
1982   for (i=0; i<m; i++) {
1983     nz = ii[i+1] - ii[i];
1984     if (nz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1985     nz_max = PetscMax(nz_max,nz);
1986     JJ  = jj + ii[i];
1987     for (j=0; j<nz; j++) {
1988       if (*JJ >= cstart) break;
1989       JJ++;
1990     }
1991     d = 0;
1992     for (; j<nz; j++) {
1993       if (*JJ++ >= cend) break;
1994       d++;
1995     }
1996     d_nnz[i] = d;
1997     o_nnz[i] = nz - d;
1998   }
1999   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2000   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2001 
2002   values = (PetscScalar*)V;
2003   if (!values) {
2004     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2005     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2006   }
2007   for (i=0; i<m; i++) {
2008     PetscInt          row    = i + rstart;
2009     PetscInt          ncols  = ii[i+1] - ii[i];
2010     const PetscInt    *icols = jj + ii[i];
2011     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2012     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2013   }
2014 
2015   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2016   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2017   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2018 
2019   PetscFunctionReturn(0);
2020 }
2021 EXTERN_C_END
2022 
2023 #undef __FUNCT__
2024 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2025 /*@C
2026    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2027    (the default parallel PETSc format).
2028 
2029    Collective on MPI_Comm
2030 
2031    Input Parameters:
2032 +  A - the matrix
2033 .  i - the indices into j for the start of each local row (starts with zero)
2034 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2035 -  v - optional values in the matrix
2036 
2037    Level: developer
2038 
2039 .keywords: matrix, aij, compressed row, sparse, parallel
2040 
2041 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2042 @*/
2043 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2044 {
2045   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2046 
2047   PetscFunctionBegin;
2048   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2049   if (f) {
2050     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2051   }
2052   PetscFunctionReturn(0);
2053 }
2054 
2055 EXTERN_C_BEGIN
2056 #undef __FUNCT__
2057 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2058 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2059 {
2060   Mat_MPIBAIJ    *b;
2061   PetscErrorCode ierr;
2062   PetscInt       i, newbs = PetscAbs(bs);
2063 
2064   PetscFunctionBegin;
2065   B->preallocated = PETSC_TRUE;
2066   if (bs < 0) {
2067     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2068       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2069     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2070     bs   = PetscAbs(bs);
2071   }
2072   if ((d_nnz || o_nnz) && newbs != bs) {
2073     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
2074   }
2075   bs = newbs;
2076 
2077 
2078   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2079   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2080   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2081   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2082   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2083 
2084   B->rmap->bs  = bs;
2085   B->cmap->bs  = bs;
2086   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2087   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2088 
2089   if (d_nnz) {
2090     for (i=0; i<B->rmap->n/bs; i++) {
2091       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2092     }
2093   }
2094   if (o_nnz) {
2095     for (i=0; i<B->rmap->n/bs; i++) {
2096       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2097     }
2098   }
2099 
2100   b = (Mat_MPIBAIJ*)B->data;
2101   b->bs2 = bs*bs;
2102   b->mbs = B->rmap->n/bs;
2103   b->nbs = B->cmap->n/bs;
2104   b->Mbs = B->rmap->N/bs;
2105   b->Nbs = B->cmap->N/bs;
2106 
2107   for (i=0; i<=b->size; i++) {
2108     b->rangebs[i] = B->rmap->range[i]/bs;
2109   }
2110   b->rstartbs = B->rmap->rstart/bs;
2111   b->rendbs   = B->rmap->rend/bs;
2112   b->cstartbs = B->cmap->rstart/bs;
2113   b->cendbs   = B->cmap->rend/bs;
2114 
2115   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2116   ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2117   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2118   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2119   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2120   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2121   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2122   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2123   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2124   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2125 
2126   ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
2127 
2128   PetscFunctionReturn(0);
2129 }
2130 EXTERN_C_END
2131 
2132 EXTERN_C_BEGIN
2133 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2134 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2135 EXTERN_C_END
2136 
2137 /*MC
2138    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2139 
2140    Options Database Keys:
2141 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2142 . -mat_block_size <bs> - set the blocksize used to store the matrix
2143 - -mat_use_hash_table <fact>
2144 
2145   Level: beginner
2146 
2147 .seealso: MatCreateMPIBAIJ
2148 M*/
2149 
2150 EXTERN_C_BEGIN
2151 #undef __FUNCT__
2152 #define __FUNCT__ "MatCreate_MPIBAIJ"
2153 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2154 {
2155   Mat_MPIBAIJ    *b;
2156   PetscErrorCode ierr;
2157   PetscTruth     flg;
2158 
2159   PetscFunctionBegin;
2160   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2161   B->data = (void*)b;
2162 
2163 
2164   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2165   B->mapping    = 0;
2166   B->assembled  = PETSC_FALSE;
2167 
2168   B->insertmode = NOT_SET_VALUES;
2169   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
2170   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
2171 
2172   /* build local table of row and column ownerships */
2173   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
2174 
2175   /* build cache for off array entries formed */
2176   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
2177   b->donotstash  = PETSC_FALSE;
2178   b->colmap      = PETSC_NULL;
2179   b->garray      = PETSC_NULL;
2180   b->roworiented = PETSC_TRUE;
2181 
2182   /* stuff used in block assembly */
2183   b->barray       = 0;
2184 
2185   /* stuff used for matrix vector multiply */
2186   b->lvec         = 0;
2187   b->Mvctx        = 0;
2188 
2189   /* stuff for MatGetRow() */
2190   b->rowindices   = 0;
2191   b->rowvalues    = 0;
2192   b->getrowactive = PETSC_FALSE;
2193 
2194   /* hash table stuff */
2195   b->ht           = 0;
2196   b->hd           = 0;
2197   b->ht_size      = 0;
2198   b->ht_flag      = PETSC_FALSE;
2199   b->ht_fact      = 0;
2200   b->ht_total_ct  = 0;
2201   b->ht_insert_ct = 0;
2202 
2203   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2204     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2205     if (flg) {
2206       PetscReal fact = 1.39;
2207       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2208       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
2209       if (fact <= 1.0) fact = 1.39;
2210       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2211       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2212     }
2213   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2214 
2215   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2216                                      "MatStoreValues_MPIBAIJ",
2217                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2218   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2219                                      "MatRetrieveValues_MPIBAIJ",
2220                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2221   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2222                                      "MatGetDiagonalBlock_MPIBAIJ",
2223                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2224   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2225                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2226                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2227   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2228 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
2229 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2230   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2231                                      "MatDiagonalScaleLocal_MPIBAIJ",
2232                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2233   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2234                                      "MatSetHashTableFactor_MPIBAIJ",
2235                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2236   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
2237   PetscFunctionReturn(0);
2238 }
2239 EXTERN_C_END
2240 
2241 /*MC
2242    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2243 
2244    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2245    and MATMPIBAIJ otherwise.
2246 
2247    Options Database Keys:
2248 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2249 
2250   Level: beginner
2251 
2252 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2253 M*/
2254 
2255 EXTERN_C_BEGIN
2256 #undef __FUNCT__
2257 #define __FUNCT__ "MatCreate_BAIJ"
2258 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
2259 {
2260   PetscErrorCode ierr;
2261   PetscMPIInt    size;
2262 
2263   PetscFunctionBegin;
2264   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2265   if (size == 1) {
2266     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2267   } else {
2268     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2269   }
2270   PetscFunctionReturn(0);
2271 }
2272 EXTERN_C_END
2273 
2274 #undef __FUNCT__
2275 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2276 /*@C
2277    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2278    (block compressed row).  For good matrix assembly performance
2279    the user should preallocate the matrix storage by setting the parameters
2280    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2281    performance can be increased by more than a factor of 50.
2282 
2283    Collective on Mat
2284 
2285    Input Parameters:
2286 +  A - the matrix
2287 .  bs   - size of blockk
2288 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2289            submatrix  (same for all local rows)
2290 .  d_nnz - array containing the number of block nonzeros in the various block rows
2291            of the in diagonal portion of the local (possibly different for each block
2292            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2293 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2294            submatrix (same for all local rows).
2295 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2296            off-diagonal portion of the local submatrix (possibly different for
2297            each block row) or PETSC_NULL.
2298 
2299    If the *_nnz parameter is given then the *_nz parameter is ignored
2300 
2301    Options Database Keys:
2302 +   -mat_block_size - size of the blocks to use
2303 -   -mat_use_hash_table <fact>
2304 
2305    Notes:
2306    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2307    than it must be used on all processors that share the object for that argument.
2308 
2309    Storage Information:
2310    For a square global matrix we define each processor's diagonal portion
2311    to be its local rows and the corresponding columns (a square submatrix);
2312    each processor's off-diagonal portion encompasses the remainder of the
2313    local matrix (a rectangular submatrix).
2314 
2315    The user can specify preallocated storage for the diagonal part of
2316    the local submatrix with either d_nz or d_nnz (not both).  Set
2317    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2318    memory allocation.  Likewise, specify preallocated storage for the
2319    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2320 
2321    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2322    the figure below we depict these three local rows and all columns (0-11).
2323 
2324 .vb
2325            0 1 2 3 4 5 6 7 8 9 10 11
2326           -------------------
2327    row 3  |  o o o d d d o o o o o o
2328    row 4  |  o o o d d d o o o o o o
2329    row 5  |  o o o d d d o o o o o o
2330           -------------------
2331 .ve
2332 
2333    Thus, any entries in the d locations are stored in the d (diagonal)
2334    submatrix, and any entries in the o locations are stored in the
2335    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2336    stored simply in the MATSEQBAIJ format for compressed row storage.
2337 
2338    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2339    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2340    In general, for PDE problems in which most nonzeros are near the diagonal,
2341    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2342    or you will get TERRIBLE performance; see the users' manual chapter on
2343    matrices.
2344 
2345    You can call MatGetInfo() to get information on how effective the preallocation was;
2346    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2347    You can also run with the option -info and look for messages with the string
2348    malloc in them to see if additional memory allocation was needed.
2349 
2350    Level: intermediate
2351 
2352 .keywords: matrix, block, aij, compressed row, sparse, parallel
2353 
2354 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2355 @*/
2356 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2357 {
2358   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2359 
2360   PetscFunctionBegin;
2361   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2362   if (f) {
2363     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2364   }
2365   PetscFunctionReturn(0);
2366 }
2367 
2368 #undef __FUNCT__
2369 #define __FUNCT__ "MatCreateMPIBAIJ"
2370 /*@C
2371    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2372    (block compressed row).  For good matrix assembly performance
2373    the user should preallocate the matrix storage by setting the parameters
2374    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2375    performance can be increased by more than a factor of 50.
2376 
2377    Collective on MPI_Comm
2378 
2379    Input Parameters:
2380 +  comm - MPI communicator
2381 .  bs   - size of blockk
2382 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2383            This value should be the same as the local size used in creating the
2384            y vector for the matrix-vector product y = Ax.
2385 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2386            This value should be the same as the local size used in creating the
2387            x vector for the matrix-vector product y = Ax.
2388 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2389 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2390 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2391            submatrix  (same for all local rows)
2392 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2393            of the in diagonal portion of the local (possibly different for each block
2394            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2395 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2396            submatrix (same for all local rows).
2397 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2398            off-diagonal portion of the local submatrix (possibly different for
2399            each block row) or PETSC_NULL.
2400 
2401    Output Parameter:
2402 .  A - the matrix
2403 
2404    Options Database Keys:
2405 +   -mat_block_size - size of the blocks to use
2406 -   -mat_use_hash_table <fact>
2407 
2408    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2409    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
2410    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
2411    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2412 
2413    Notes:
2414    If the *_nnz parameter is given then the *_nz parameter is ignored
2415 
2416    A nonzero block is any block that as 1 or more nonzeros in it
2417 
2418    The user MUST specify either the local or global matrix dimensions
2419    (possibly both).
2420 
2421    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2422    than it must be used on all processors that share the object for that argument.
2423 
2424    Storage Information:
2425    For a square global matrix we define each processor's diagonal portion
2426    to be its local rows and the corresponding columns (a square submatrix);
2427    each processor's off-diagonal portion encompasses the remainder of the
2428    local matrix (a rectangular submatrix).
2429 
2430    The user can specify preallocated storage for the diagonal part of
2431    the local submatrix with either d_nz or d_nnz (not both).  Set
2432    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2433    memory allocation.  Likewise, specify preallocated storage for the
2434    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2435 
2436    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2437    the figure below we depict these three local rows and all columns (0-11).
2438 
2439 .vb
2440            0 1 2 3 4 5 6 7 8 9 10 11
2441           -------------------
2442    row 3  |  o o o d d d o o o o o o
2443    row 4  |  o o o d d d o o o o o o
2444    row 5  |  o o o d d d o o o o o o
2445           -------------------
2446 .ve
2447 
2448    Thus, any entries in the d locations are stored in the d (diagonal)
2449    submatrix, and any entries in the o locations are stored in the
2450    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2451    stored simply in the MATSEQBAIJ format for compressed row storage.
2452 
2453    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2454    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2455    In general, for PDE problems in which most nonzeros are near the diagonal,
2456    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2457    or you will get TERRIBLE performance; see the users' manual chapter on
2458    matrices.
2459 
2460    Level: intermediate
2461 
2462 .keywords: matrix, block, aij, compressed row, sparse, parallel
2463 
2464 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2465 @*/
2466 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2467 {
2468   PetscErrorCode ierr;
2469   PetscMPIInt    size;
2470 
2471   PetscFunctionBegin;
2472   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2473   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2474   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2475   if (size > 1) {
2476     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2477     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2478   } else {
2479     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2480     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2481   }
2482   PetscFunctionReturn(0);
2483 }
2484 
2485 #undef __FUNCT__
2486 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2487 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2488 {
2489   Mat            mat;
2490   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2491   PetscErrorCode ierr;
2492   PetscInt       len=0;
2493 
2494   PetscFunctionBegin;
2495   *newmat       = 0;
2496   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2497   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2498   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2499   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2500 
2501   mat->factor       = matin->factor;
2502   mat->preallocated = PETSC_TRUE;
2503   mat->assembled    = PETSC_TRUE;
2504   mat->insertmode   = NOT_SET_VALUES;
2505 
2506   a      = (Mat_MPIBAIJ*)mat->data;
2507   mat->rmap->bs  = matin->rmap->bs;
2508   a->bs2   = oldmat->bs2;
2509   a->mbs   = oldmat->mbs;
2510   a->nbs   = oldmat->nbs;
2511   a->Mbs   = oldmat->Mbs;
2512   a->Nbs   = oldmat->Nbs;
2513 
2514   ierr = PetscMapCopy(((PetscObject)matin)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr);
2515   ierr = PetscMapCopy(((PetscObject)matin)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr);
2516 
2517   a->size         = oldmat->size;
2518   a->rank         = oldmat->rank;
2519   a->donotstash   = oldmat->donotstash;
2520   a->roworiented  = oldmat->roworiented;
2521   a->rowindices   = 0;
2522   a->rowvalues    = 0;
2523   a->getrowactive = PETSC_FALSE;
2524   a->barray       = 0;
2525   a->rstartbs     = oldmat->rstartbs;
2526   a->rendbs       = oldmat->rendbs;
2527   a->cstartbs     = oldmat->cstartbs;
2528   a->cendbs       = oldmat->cendbs;
2529 
2530   /* hash table stuff */
2531   a->ht           = 0;
2532   a->hd           = 0;
2533   a->ht_size      = 0;
2534   a->ht_flag      = oldmat->ht_flag;
2535   a->ht_fact      = oldmat->ht_fact;
2536   a->ht_total_ct  = 0;
2537   a->ht_insert_ct = 0;
2538 
2539   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
2540   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2541   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2542   if (oldmat->colmap) {
2543 #if defined (PETSC_USE_CTABLE)
2544   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2545 #else
2546   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2547   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2548   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2549 #endif
2550   } else a->colmap = 0;
2551 
2552   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2553     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2554     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2555     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2556   } else a->garray = 0;
2557 
2558   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2559   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2560   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2561   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2562 
2563   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2564   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2565   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2566   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2567   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2568   *newmat = mat;
2569 
2570   PetscFunctionReturn(0);
2571 }
2572 
2573 #include "petscsys.h"
2574 
2575 #undef __FUNCT__
2576 #define __FUNCT__ "MatLoad_MPIBAIJ"
2577 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2578 {
2579   Mat            A;
2580   PetscErrorCode ierr;
2581   int            fd;
2582   PetscInt       i,nz,j,rstart,rend;
2583   PetscScalar    *vals,*buf;
2584   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2585   MPI_Status     status;
2586   PetscMPIInt    rank,size,maxnz;
2587   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2588   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
2589   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
2590   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
2591   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
2592   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
2593 
2594   PetscFunctionBegin;
2595   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
2596     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2597   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2598 
2599   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2600   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2601   if (!rank) {
2602     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2603     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2604     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2605   }
2606 
2607   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2608   M = header[1]; N = header[2];
2609 
2610   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2611 
2612   /*
2613      This code adds extra rows to make sure the number of rows is
2614      divisible by the blocksize
2615   */
2616   Mbs        = M/bs;
2617   extra_rows = bs - M + bs*Mbs;
2618   if (extra_rows == bs) extra_rows = 0;
2619   else                  Mbs++;
2620   if (extra_rows && !rank) {
2621     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2622   }
2623 
2624   /* determine ownership of all rows */
2625   mbs        = Mbs/size + ((Mbs % size) > rank);
2626   m          = mbs*bs;
2627   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
2628   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2629 
2630   /* process 0 needs enough room for process with most rows */
2631   if (!rank) {
2632     mmax = rowners[1];
2633     for (i=2; i<size; i++) {
2634       mmax = PetscMax(mmax,rowners[i]);
2635     }
2636     mmax*=bs;
2637   } else mmax = m;
2638 
2639   rowners[0] = 0;
2640   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
2641   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2642   rstart = rowners[rank];
2643   rend   = rowners[rank+1];
2644 
2645   /* distribute row lengths to all processors */
2646   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2647   if (!rank) {
2648     mend = m;
2649     if (size == 1) mend = mend - extra_rows;
2650     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
2651     for (j=mend; j<m; j++) locrowlens[j] = 1;
2652     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2653     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2654     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2655     for (j=0; j<m; j++) {
2656       procsnz[0] += locrowlens[j];
2657     }
2658     for (i=1; i<size; i++) {
2659       mend = browners[i+1] - browners[i];
2660       if (i == size-1) mend = mend - extra_rows;
2661       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
2662       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2663       /* calculate the number of nonzeros on each processor */
2664       for (j=0; j<browners[i+1]-browners[i]; j++) {
2665         procsnz[i] += rowlengths[j];
2666       }
2667       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2668     }
2669     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2670   } else {
2671     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2672   }
2673 
2674   if (!rank) {
2675     /* determine max buffer needed and allocate it */
2676     maxnz = procsnz[0];
2677     for (i=1; i<size; i++) {
2678       maxnz = PetscMax(maxnz,procsnz[i]);
2679     }
2680     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2681 
2682     /* read in my part of the matrix column indices  */
2683     nz     = procsnz[0];
2684     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2685     mycols = ibuf;
2686     if (size == 1)  nz -= extra_rows;
2687     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2688     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2689 
2690     /* read in every ones (except the last) and ship off */
2691     for (i=1; i<size-1; i++) {
2692       nz   = procsnz[i];
2693       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2694       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2695     }
2696     /* read in the stuff for the last proc */
2697     if (size != 1) {
2698       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2699       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2700       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2701       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2702     }
2703     ierr = PetscFree(cols);CHKERRQ(ierr);
2704   } else {
2705     /* determine buffer space needed for message */
2706     nz = 0;
2707     for (i=0; i<m; i++) {
2708       nz += locrowlens[i];
2709     }
2710     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2711     mycols = ibuf;
2712     /* receive message of column indices*/
2713     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2714     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2715     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2716   }
2717 
2718   /* loop over local rows, determining number of off diagonal entries */
2719   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2720   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2721   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2722   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2723   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2724   rowcount = 0; nzcount = 0;
2725   for (i=0; i<mbs; i++) {
2726     dcount  = 0;
2727     odcount = 0;
2728     for (j=0; j<bs; j++) {
2729       kmax = locrowlens[rowcount];
2730       for (k=0; k<kmax; k++) {
2731         tmp = mycols[nzcount++]/bs;
2732         if (!mask[tmp]) {
2733           mask[tmp] = 1;
2734           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2735           else masked1[dcount++] = tmp;
2736         }
2737       }
2738       rowcount++;
2739     }
2740 
2741     dlens[i]  = dcount;
2742     odlens[i] = odcount;
2743 
2744     /* zero out the mask elements we set */
2745     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2746     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2747   }
2748 
2749   /* create our matrix */
2750   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2751   ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2752   ierr = MatSetType(A,type);CHKERRQ(ierr)
2753   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2754 
2755   if (!rank) {
2756     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2757     /* read in my part of the matrix numerical values  */
2758     nz = procsnz[0];
2759     vals = buf;
2760     mycols = ibuf;
2761     if (size == 1)  nz -= extra_rows;
2762     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2763     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2764 
2765     /* insert into matrix */
2766     jj      = rstart*bs;
2767     for (i=0; i<m; i++) {
2768       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2769       mycols += locrowlens[i];
2770       vals   += locrowlens[i];
2771       jj++;
2772     }
2773     /* read in other processors (except the last one) and ship out */
2774     for (i=1; i<size-1; i++) {
2775       nz   = procsnz[i];
2776       vals = buf;
2777       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2778       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2779     }
2780     /* the last proc */
2781     if (size != 1){
2782       nz   = procsnz[i] - extra_rows;
2783       vals = buf;
2784       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2785       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2786       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2787     }
2788     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2789   } else {
2790     /* receive numeric values */
2791     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2792 
2793     /* receive message of values*/
2794     vals   = buf;
2795     mycols = ibuf;
2796     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2797     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2798     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2799 
2800     /* insert into matrix */
2801     jj      = rstart*bs;
2802     for (i=0; i<m; i++) {
2803       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2804       mycols += locrowlens[i];
2805       vals   += locrowlens[i];
2806       jj++;
2807     }
2808   }
2809   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2810   ierr = PetscFree(buf);CHKERRQ(ierr);
2811   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2812   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2813   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2814   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2815   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2816   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2817 
2818   *newmat = A;
2819   PetscFunctionReturn(0);
2820 }
2821 
2822 #undef __FUNCT__
2823 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2824 /*@
2825    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2826 
2827    Input Parameters:
2828 .  mat  - the matrix
2829 .  fact - factor
2830 
2831    Collective on Mat
2832 
2833    Level: advanced
2834 
2835   Notes:
2836    This can also be set by the command line option: -mat_use_hash_table <fact>
2837 
2838 .keywords: matrix, hashtable, factor, HT
2839 
2840 .seealso: MatSetOption()
2841 @*/
2842 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2843 {
2844   PetscErrorCode ierr,(*f)(Mat,PetscReal);
2845 
2846   PetscFunctionBegin;
2847   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2848   if (f) {
2849     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2850   }
2851   PetscFunctionReturn(0);
2852 }
2853 
2854 EXTERN_C_BEGIN
2855 #undef __FUNCT__
2856 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2857 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2858 {
2859   Mat_MPIBAIJ *baij;
2860 
2861   PetscFunctionBegin;
2862   baij = (Mat_MPIBAIJ*)mat->data;
2863   baij->ht_fact = fact;
2864   PetscFunctionReturn(0);
2865 }
2866 EXTERN_C_END
2867 
2868 #undef __FUNCT__
2869 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2870 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2871 {
2872   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2873   PetscFunctionBegin;
2874   *Ad     = a->A;
2875   *Ao     = a->B;
2876   *colmap = a->garray;
2877   PetscFunctionReturn(0);
2878 }
2879 
2880 /*
2881     Special version for direct calls from Fortran (to eliminate two function call overheads
2882 */
2883 #if defined(PETSC_HAVE_FORTRAN_CAPS)
2884 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
2885 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
2886 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
2887 #endif
2888 
2889 #undef __FUNCT__
2890 #define __FUNCT__ "matmpibiajsetvaluesblocked"
2891 /*@C
2892   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
2893 
2894   Collective on Mat
2895 
2896   Input Parameters:
2897 + mat - the matrix
2898 . min - number of input rows
2899 . im - input rows
2900 . nin - number of input columns
2901 . in - input columns
2902 . v - numerical values input
2903 - addvin - INSERT_VALUES or ADD_VALUES
2904 
2905   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
2906 
2907   Level: advanced
2908 
2909 .seealso:   MatSetValuesBlocked()
2910 @*/
2911 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
2912 {
2913   /* convert input arguments to C version */
2914   Mat             mat = *matin;
2915   PetscInt        m = *min, n = *nin;
2916   InsertMode      addv = *addvin;
2917 
2918   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
2919   const MatScalar *value;
2920   MatScalar       *barray=baij->barray;
2921   PetscTruth      roworiented = baij->roworiented;
2922   PetscErrorCode  ierr;
2923   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
2924   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
2925   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
2926 
2927   PetscFunctionBegin;
2928   /* tasks normally handled by MatSetValuesBlocked() */
2929   if (mat->insertmode == NOT_SET_VALUES) {
2930     mat->insertmode = addv;
2931   }
2932 #if defined(PETSC_USE_DEBUG)
2933   else if (mat->insertmode != addv) {
2934     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
2935   }
2936   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2937 #endif
2938   if (mat->assembled) {
2939     mat->was_assembled = PETSC_TRUE;
2940     mat->assembled     = PETSC_FALSE;
2941   }
2942   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
2943 
2944 
2945   if(!barray) {
2946     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
2947     baij->barray = barray;
2948   }
2949 
2950   if (roworiented) {
2951     stepval = (n-1)*bs;
2952   } else {
2953     stepval = (m-1)*bs;
2954   }
2955   for (i=0; i<m; i++) {
2956     if (im[i] < 0) continue;
2957 #if defined(PETSC_USE_DEBUG)
2958     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
2959 #endif
2960     if (im[i] >= rstart && im[i] < rend) {
2961       row = im[i] - rstart;
2962       for (j=0; j<n; j++) {
2963         /* If NumCol = 1 then a copy is not required */
2964         if ((roworiented) && (n == 1)) {
2965           barray = (MatScalar*)v + i*bs2;
2966         } else if((!roworiented) && (m == 1)) {
2967           barray = (MatScalar*)v + j*bs2;
2968         } else { /* Here a copy is required */
2969           if (roworiented) {
2970             value = v + i*(stepval+bs)*bs + j*bs;
2971           } else {
2972             value = v + j*(stepval+bs)*bs + i*bs;
2973           }
2974           for (ii=0; ii<bs; ii++,value+=stepval) {
2975             for (jj=0; jj<bs; jj++) {
2976               *barray++  = *value++;
2977             }
2978           }
2979           barray -=bs2;
2980         }
2981 
2982         if (in[j] >= cstart && in[j] < cend){
2983           col  = in[j] - cstart;
2984           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
2985         }
2986         else if (in[j] < 0) continue;
2987 #if defined(PETSC_USE_DEBUG)
2988         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
2989 #endif
2990         else {
2991           if (mat->was_assembled) {
2992             if (!baij->colmap) {
2993               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2994             }
2995 
2996 #if defined(PETSC_USE_DEBUG)
2997 #if defined (PETSC_USE_CTABLE)
2998             { PetscInt data;
2999               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3000               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
3001             }
3002 #else
3003             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
3004 #endif
3005 #endif
3006 #if defined (PETSC_USE_CTABLE)
3007 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3008             col  = (col - 1)/bs;
3009 #else
3010             col = (baij->colmap[in[j]] - 1)/bs;
3011 #endif
3012             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3013               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3014               col =  in[j];
3015             }
3016           }
3017           else col = in[j];
3018           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3019         }
3020       }
3021     } else {
3022       if (!baij->donotstash) {
3023         if (roworiented) {
3024           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3025         } else {
3026           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3027         }
3028       }
3029     }
3030   }
3031 
3032   /* task normally handled by MatSetValuesBlocked() */
3033   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3034   PetscFunctionReturn(0);
3035 }
3036