xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision f3fe499b4cc4d64bf04aa4f5e4963dcc4eb56541)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 #include "petscblaslapack.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
20 {
21   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
22   PetscErrorCode ierr;
23   PetscInt       i,*idxb = 0;
24   PetscScalar    *va,*vb;
25   Vec            vtmp;
26 
27   PetscFunctionBegin;
28   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
29   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
30   if (idx) {
31     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap->n; i++){
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
41   }
42 
43   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
44   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
45   if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);}
46   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
47   PetscFunctionReturn(0);
48 }
49 
50 EXTERN_C_BEGIN
51 #undef __FUNCT__
52 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
54 {
55   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
56   PetscErrorCode ierr;
57 
58   PetscFunctionBegin;
59   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
60   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
61   PetscFunctionReturn(0);
62 }
63 EXTERN_C_END
64 
65 EXTERN_C_BEGIN
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 EXTERN_C_END
79 
80 /*
81      Local utility routine that creates a mapping from the global column
82    number to the local number in the off-diagonal part of the local
83    storage of the matrix.  This is done in a non scalable way since the
84    length of colmap equals the global matrix length.
85 */
86 #undef __FUNCT__
87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
89 {
90   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
91   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
92   PetscErrorCode ierr;
93   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
94 
95   PetscFunctionBegin;
96 #if defined (PETSC_USE_CTABLE)
97   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
98   for (i=0; i<nbs; i++){
99     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
100   }
101 #else
102   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
103   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
105   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
106 #endif
107   PetscFunctionReturn(0);
108 }
109 
110 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
111 { \
112  \
113     brow = row/bs;  \
114     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
115     rmax = aimax[brow]; nrow = ailen[brow]; \
116       bcol = col/bs; \
117       ridx = row % bs; cidx = col % bs; \
118       low = 0; high = nrow; \
119       while (high-low > 3) { \
120         t = (low+high)/2; \
121         if (rp[t] > bcol) high = t; \
122         else              low  = t; \
123       } \
124       for (_i=low; _i<high; _i++) { \
125         if (rp[_i] > bcol) break; \
126         if (rp[_i] == bcol) { \
127           bap  = ap +  bs2*_i + bs*cidx + ridx; \
128           if (addv == ADD_VALUES) *bap += value;  \
129           else                    *bap  = value;  \
130           goto a_noinsert; \
131         } \
132       } \
133       if (a->nonew == 1) goto a_noinsert; \
134       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
135       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
136       N = nrow++ - 1;  \
137       /* shift up all the later entries in this row */ \
138       for (ii=N; ii>=_i; ii--) { \
139         rp[ii+1] = rp[ii]; \
140         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
141       } \
142       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
143       rp[_i]                      = bcol;  \
144       ap[bs2*_i + bs*cidx + ridx] = value;  \
145       a_noinsert:; \
146     ailen[brow] = nrow; \
147 }
148 
149 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
150 { \
151     brow = row/bs;  \
152     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
153     rmax = bimax[brow]; nrow = bilen[brow]; \
154       bcol = col/bs; \
155       ridx = row % bs; cidx = col % bs; \
156       low = 0; high = nrow; \
157       while (high-low > 3) { \
158         t = (low+high)/2; \
159         if (rp[t] > bcol) high = t; \
160         else              low  = t; \
161       } \
162       for (_i=low; _i<high; _i++) { \
163         if (rp[_i] > bcol) break; \
164         if (rp[_i] == bcol) { \
165           bap  = ap +  bs2*_i + bs*cidx + ridx; \
166           if (addv == ADD_VALUES) *bap += value;  \
167           else                    *bap  = value;  \
168           goto b_noinsert; \
169         } \
170       } \
171       if (b->nonew == 1) goto b_noinsert; \
172       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
173       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
174       CHKMEMQ;\
175       N = nrow++ - 1;  \
176       /* shift up all the later entries in this row */ \
177       for (ii=N; ii>=_i; ii--) { \
178         rp[ii+1] = rp[ii]; \
179         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
180       } \
181       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
182       rp[_i]                      = bcol;  \
183       ap[bs2*_i + bs*cidx + ridx] = value;  \
184       b_noinsert:; \
185     bilen[brow] = nrow; \
186 }
187 
188 #undef __FUNCT__
189 #define __FUNCT__ "MatSetValues_MPIBAIJ"
190 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
191 {
192   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
193   MatScalar      value;
194   PetscTruth     roworiented = baij->roworiented;
195   PetscErrorCode ierr;
196   PetscInt       i,j,row,col;
197   PetscInt       rstart_orig=mat->rmap->rstart;
198   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
199   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
200 
201   /* Some Variables required in the macro */
202   Mat            A = baij->A;
203   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
204   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
205   MatScalar      *aa=a->a;
206 
207   Mat            B = baij->B;
208   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
209   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
210   MatScalar      *ba=b->a;
211 
212   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
213   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
214   MatScalar      *ap,*bap;
215 
216   PetscFunctionBegin;
217   if (v) PetscValidScalarPointer(v,6);
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_DEBUG)
221     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
222 #endif
223     if (im[i] >= rstart_orig && im[i] < rend_orig) {
224       row = im[i] - rstart_orig;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart_orig && in[j] < cend_orig){
227           col = in[j] - cstart_orig;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
230           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
231         } else if (in[j] < 0) continue;
232 #if defined(PETSC_USE_DEBUG)
233         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1);
234 #endif
235         else {
236           if (mat->was_assembled) {
237             if (!baij->colmap) {
238               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
239             }
240 #if defined (PETSC_USE_CTABLE)
241             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
242             col  = col - 1;
243 #else
244             col = baij->colmap[in[j]/bs] - 1;
245 #endif
246             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
247               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
248               col =  in[j];
249               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
250               B = baij->B;
251               b = (Mat_SeqBAIJ*)(B)->data;
252               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
253               ba=b->a;
254             } else col += in[j]%bs;
255           } else col = in[j];
256           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
257           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
258           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
259         }
260       }
261     } else {
262       if (!baij->donotstash) {
263         if (roworiented) {
264           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
265         } else {
266           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
267         }
268       }
269     }
270   }
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
276 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
277 {
278   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
279   const PetscScalar *value;
280   MatScalar         *barray=baij->barray;
281   PetscTruth        roworiented = baij->roworiented;
282   PetscErrorCode    ierr;
283   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
284   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
285   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
286 
287   PetscFunctionBegin;
288   if(!barray) {
289     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
290     baij->barray = barray;
291   }
292 
293   if (roworiented) {
294     stepval = (n-1)*bs;
295   } else {
296     stepval = (m-1)*bs;
297   }
298   for (i=0; i<m; i++) {
299     if (im[i] < 0) continue;
300 #if defined(PETSC_USE_DEBUG)
301     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
302 #endif
303     if (im[i] >= rstart && im[i] < rend) {
304       row = im[i] - rstart;
305       for (j=0; j<n; j++) {
306         /* If NumCol = 1 then a copy is not required */
307         if ((roworiented) && (n == 1)) {
308           barray = (MatScalar*)v + i*bs2;
309         } else if((!roworiented) && (m == 1)) {
310           barray = (MatScalar*)v + j*bs2;
311         } else { /* Here a copy is required */
312           if (roworiented) {
313             value = v + (i*(stepval+bs) + j)*bs;
314           } else {
315 	    value = v + (j*(stepval+bs) + i)*bs;
316           }
317           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
318             for (jj=0; jj<bs; jj++) {
319               barray[jj]  = value[jj];
320             }
321             barray += bs;
322           }
323           barray -= bs2;
324         }
325 
326         if (in[j] >= cstart && in[j] < cend){
327           col  = in[j] - cstart;
328           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
329         }
330         else if (in[j] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
333 #endif
334         else {
335           if (mat->was_assembled) {
336             if (!baij->colmap) {
337               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
338             }
339 
340 #if defined(PETSC_USE_DEBUG)
341 #if defined (PETSC_USE_CTABLE)
342             { PetscInt data;
343               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
344               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
345             }
346 #else
347             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
348 #endif
349 #endif
350 #if defined (PETSC_USE_CTABLE)
351 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
352             col  = (col - 1)/bs;
353 #else
354             col = (baij->colmap[in[j]] - 1)/bs;
355 #endif
356             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
357               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
358               col =  in[j];
359             }
360           }
361           else col = in[j];
362           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
363         }
364       }
365     } else {
366       if (!baij->donotstash) {
367         if (roworiented) {
368           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
369         } else {
370           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
371         }
372       }
373     }
374   }
375   PetscFunctionReturn(0);
376 }
377 
378 #define HASH_KEY 0.6180339887
379 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
380 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
381 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
382 #undef __FUNCT__
383 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
384 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
385 {
386   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
387   PetscTruth     roworiented = baij->roworiented;
388   PetscErrorCode ierr;
389   PetscInt       i,j,row,col;
390   PetscInt       rstart_orig=mat->rmap->rstart;
391   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
392   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
393   PetscReal      tmp;
394   MatScalar      **HD = baij->hd,value;
395 #if defined(PETSC_USE_DEBUG)
396   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
397 #endif
398 
399   PetscFunctionBegin;
400   if (v) PetscValidScalarPointer(v,6);
401   for (i=0; i<m; i++) {
402 #if defined(PETSC_USE_DEBUG)
403     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
404     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
405 #endif
406       row = im[i];
407     if (row >= rstart_orig && row < rend_orig) {
408       for (j=0; j<n; j++) {
409         col = in[j];
410         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
411         /* Look up PetscInto the Hash Table */
412         key = (row/bs)*Nbs+(col/bs)+1;
413         h1  = HASH(size,key,tmp);
414 
415 
416         idx = h1;
417 #if defined(PETSC_USE_DEBUG)
418         insert_ct++;
419         total_ct++;
420         if (HT[idx] != key) {
421           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
422           if (idx == size) {
423             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
424             if (idx == h1) {
425               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
426             }
427           }
428         }
429 #else
430         if (HT[idx] != key) {
431           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
432           if (idx == size) {
433             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
434             if (idx == h1) {
435               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
436             }
437           }
438         }
439 #endif
440         /* A HASH table entry is found, so insert the values at the correct address */
441         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
442         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
443       }
444     } else {
445       if (!baij->donotstash) {
446         if (roworiented) {
447           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
448         } else {
449           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
450         }
451       }
452     }
453   }
454 #if defined(PETSC_USE_DEBUG)
455   baij->ht_total_ct = total_ct;
456   baij->ht_insert_ct = insert_ct;
457 #endif
458   PetscFunctionReturn(0);
459 }
460 
461 #undef __FUNCT__
462 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
463 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
464 {
465   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
466   PetscTruth        roworiented = baij->roworiented;
467   PetscErrorCode    ierr;
468   PetscInt          i,j,ii,jj,row,col;
469   PetscInt          rstart=baij->rstartbs;
470   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
471   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
472   PetscReal         tmp;
473   MatScalar         **HD = baij->hd,*baij_a;
474   const PetscScalar *v_t,*value;
475 #if defined(PETSC_USE_DEBUG)
476   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
477 #endif
478 
479   PetscFunctionBegin;
480 
481   if (roworiented) {
482     stepval = (n-1)*bs;
483   } else {
484     stepval = (m-1)*bs;
485   }
486   for (i=0; i<m; i++) {
487 #if defined(PETSC_USE_DEBUG)
488     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
489     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
490 #endif
491     row   = im[i];
492     v_t   = v + i*nbs2;
493     if (row >= rstart && row < rend) {
494       for (j=0; j<n; j++) {
495         col = in[j];
496 
497         /* Look up into the Hash Table */
498         key = row*Nbs+col+1;
499         h1  = HASH(size,key,tmp);
500 
501         idx = h1;
502 #if defined(PETSC_USE_DEBUG)
503         total_ct++;
504         insert_ct++;
505        if (HT[idx] != key) {
506           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
507           if (idx == size) {
508             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
509             if (idx == h1) {
510               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
511             }
512           }
513         }
514 #else
515         if (HT[idx] != key) {
516           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
517           if (idx == size) {
518             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
519             if (idx == h1) {
520               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
521             }
522           }
523         }
524 #endif
525         baij_a = HD[idx];
526         if (roworiented) {
527           /*value = v + i*(stepval+bs)*bs + j*bs;*/
528           /* value = v + (i*(stepval+bs)+j)*bs; */
529           value = v_t;
530           v_t  += bs;
531           if (addv == ADD_VALUES) {
532             for (ii=0; ii<bs; ii++,value+=stepval) {
533               for (jj=ii; jj<bs2; jj+=bs) {
534                 baij_a[jj]  += *value++;
535               }
536             }
537           } else {
538             for (ii=0; ii<bs; ii++,value+=stepval) {
539               for (jj=ii; jj<bs2; jj+=bs) {
540                 baij_a[jj]  = *value++;
541               }
542             }
543           }
544         } else {
545           value = v + j*(stepval+bs)*bs + i*bs;
546           if (addv == ADD_VALUES) {
547             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
548               for (jj=0; jj<bs; jj++) {
549                 baij_a[jj]  += *value++;
550               }
551             }
552           } else {
553             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
554               for (jj=0; jj<bs; jj++) {
555                 baij_a[jj]  = *value++;
556               }
557             }
558           }
559         }
560       }
561     } else {
562       if (!baij->donotstash) {
563         if (roworiented) {
564           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
565         } else {
566           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
567         }
568       }
569     }
570   }
571 #if defined(PETSC_USE_DEBUG)
572   baij->ht_total_ct = total_ct;
573   baij->ht_insert_ct = insert_ct;
574 #endif
575   PetscFunctionReturn(0);
576 }
577 
578 #undef __FUNCT__
579 #define __FUNCT__ "MatGetValues_MPIBAIJ"
580 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
581 {
582   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
583   PetscErrorCode ierr;
584   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
585   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
586 
587   PetscFunctionBegin;
588   for (i=0; i<m; i++) {
589     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
590     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
591     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
592       row = idxm[i] - bsrstart;
593       for (j=0; j<n; j++) {
594         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
595         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
596         if (idxn[j] >= bscstart && idxn[j] < bscend){
597           col = idxn[j] - bscstart;
598           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
599         } else {
600           if (!baij->colmap) {
601             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
602           }
603 #if defined (PETSC_USE_CTABLE)
604           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
605           data --;
606 #else
607           data = baij->colmap[idxn[j]/bs]-1;
608 #endif
609           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
610           else {
611             col  = data + idxn[j]%bs;
612             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
613           }
614         }
615       }
616     } else {
617       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
618     }
619   }
620  PetscFunctionReturn(0);
621 }
622 
623 #undef __FUNCT__
624 #define __FUNCT__ "MatNorm_MPIBAIJ"
625 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
626 {
627   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
628   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
629   PetscErrorCode ierr;
630   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
631   PetscReal      sum = 0.0;
632   MatScalar      *v;
633 
634   PetscFunctionBegin;
635   if (baij->size == 1) {
636     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
637   } else {
638     if (type == NORM_FROBENIUS) {
639       v = amat->a;
640       nz = amat->nz*bs2;
641       for (i=0; i<nz; i++) {
642 #if defined(PETSC_USE_COMPLEX)
643         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
644 #else
645         sum += (*v)*(*v); v++;
646 #endif
647       }
648       v = bmat->a;
649       nz = bmat->nz*bs2;
650       for (i=0; i<nz; i++) {
651 #if defined(PETSC_USE_COMPLEX)
652         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
653 #else
654         sum += (*v)*(*v); v++;
655 #endif
656       }
657       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
658       *nrm = sqrt(*nrm);
659     } else if (type == NORM_1) { /* max column sum */
660       PetscReal *tmp,*tmp2;
661       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
662       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
663       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
664       v = amat->a; jj = amat->j;
665       for (i=0; i<amat->nz; i++) {
666         for (j=0; j<bs; j++){
667           col = bs*(cstart + *jj) + j; /* column index */
668           for (row=0; row<bs; row++){
669             tmp[col] += PetscAbsScalar(*v);  v++;
670           }
671         }
672         jj++;
673       }
674       v = bmat->a; jj = bmat->j;
675       for (i=0; i<bmat->nz; i++) {
676         for (j=0; j<bs; j++){
677           col = bs*garray[*jj] + j;
678           for (row=0; row<bs; row++){
679             tmp[col] += PetscAbsScalar(*v); v++;
680           }
681         }
682         jj++;
683       }
684       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
685       *nrm = 0.0;
686       for (j=0; j<mat->cmap->N; j++) {
687         if (tmp2[j] > *nrm) *nrm = tmp2[j];
688       }
689       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
690     } else if (type == NORM_INFINITY) { /* max row sum */
691       PetscReal *sums;
692       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
693       sum = 0.0;
694       for (j=0; j<amat->mbs; j++) {
695         for (row=0; row<bs; row++) sums[row] = 0.0;
696         v = amat->a + bs2*amat->i[j];
697         nz = amat->i[j+1]-amat->i[j];
698         for (i=0; i<nz; i++) {
699           for (col=0; col<bs; col++){
700             for (row=0; row<bs; row++){
701               sums[row] += PetscAbsScalar(*v); v++;
702             }
703           }
704         }
705         v = bmat->a + bs2*bmat->i[j];
706         nz = bmat->i[j+1]-bmat->i[j];
707         for (i=0; i<nz; i++) {
708           for (col=0; col<bs; col++){
709             for (row=0; row<bs; row++){
710               sums[row] += PetscAbsScalar(*v); v++;
711             }
712           }
713         }
714         for (row=0; row<bs; row++){
715           if (sums[row] > sum) sum = sums[row];
716         }
717       }
718       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
719       ierr = PetscFree(sums);CHKERRQ(ierr);
720     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
721   }
722   PetscFunctionReturn(0);
723 }
724 
725 /*
726   Creates the hash table, and sets the table
727   This table is created only once.
728   If new entried need to be added to the matrix
729   then the hash table has to be destroyed and
730   recreated.
731 */
732 #undef __FUNCT__
733 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
734 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
735 {
736   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
737   Mat            A = baij->A,B=baij->B;
738   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
739   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
740   PetscErrorCode ierr;
741   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
742   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
743   PetscInt       *HT,key;
744   MatScalar      **HD;
745   PetscReal      tmp;
746 #if defined(PETSC_USE_INFO)
747   PetscInt       ct=0,max=0;
748 #endif
749 
750   PetscFunctionBegin;
751   if (baij->ht) PetscFunctionReturn(0);
752 
753   baij->ht_size = (PetscInt)(factor*nz);
754   ht_size       = baij->ht_size;
755 
756   /* Allocate Memory for Hash Table */
757   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
758   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
759   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
760   HD   = baij->hd;
761   HT   = baij->ht;
762 
763   /* Loop Over A */
764   for (i=0; i<a->mbs; i++) {
765     for (j=ai[i]; j<ai[i+1]; j++) {
766       row = i+rstart;
767       col = aj[j]+cstart;
768 
769       key = row*Nbs + col + 1;
770       h1  = HASH(ht_size,key,tmp);
771       for (k=0; k<ht_size; k++){
772         if (!HT[(h1+k)%ht_size]) {
773           HT[(h1+k)%ht_size] = key;
774           HD[(h1+k)%ht_size] = a->a + j*bs2;
775           break;
776 #if defined(PETSC_USE_INFO)
777         } else {
778           ct++;
779 #endif
780         }
781       }
782 #if defined(PETSC_USE_INFO)
783       if (k> max) max = k;
784 #endif
785     }
786   }
787   /* Loop Over B */
788   for (i=0; i<b->mbs; i++) {
789     for (j=bi[i]; j<bi[i+1]; j++) {
790       row = i+rstart;
791       col = garray[bj[j]];
792       key = row*Nbs + col + 1;
793       h1  = HASH(ht_size,key,tmp);
794       for (k=0; k<ht_size; k++){
795         if (!HT[(h1+k)%ht_size]) {
796           HT[(h1+k)%ht_size] = key;
797           HD[(h1+k)%ht_size] = b->a + j*bs2;
798           break;
799 #if defined(PETSC_USE_INFO)
800         } else {
801           ct++;
802 #endif
803         }
804       }
805 #if defined(PETSC_USE_INFO)
806       if (k> max) max = k;
807 #endif
808     }
809   }
810 
811   /* Print Summary */
812 #if defined(PETSC_USE_INFO)
813   for (i=0,j=0; i<ht_size; i++) {
814     if (HT[i]) {j++;}
815   }
816   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
817 #endif
818   PetscFunctionReturn(0);
819 }
820 
821 #undef __FUNCT__
822 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
823 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
824 {
825   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
826   PetscErrorCode ierr;
827   PetscInt       nstash,reallocs;
828   InsertMode     addv;
829 
830   PetscFunctionBegin;
831   if (baij->donotstash) {
832     PetscFunctionReturn(0);
833   }
834 
835   /* make sure all processors are either in INSERTMODE or ADDMODE */
836   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
837   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
838   mat->insertmode = addv; /* in case this processor had no cache */
839 
840   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
841   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
842   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
843   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
844   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
845   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
846   PetscFunctionReturn(0);
847 }
848 
849 #undef __FUNCT__
850 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
851 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
852 {
853   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
854   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
855   PetscErrorCode ierr;
856   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
857   PetscInt       *row,*col;
858   PetscTruth     r1,r2,r3,other_disassembled;
859   MatScalar      *val;
860   InsertMode     addv = mat->insertmode;
861   PetscMPIInt    n;
862 
863   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
864   PetscFunctionBegin;
865   if (!baij->donotstash) {
866     while (1) {
867       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
868       if (!flg) break;
869 
870       for (i=0; i<n;) {
871         /* Now identify the consecutive vals belonging to the same row */
872         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
873         if (j < n) ncols = j-i;
874         else       ncols = n-i;
875         /* Now assemble all these values with a single function call */
876         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
877         i = j;
878       }
879     }
880     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
881     /* Now process the block-stash. Since the values are stashed column-oriented,
882        set the roworiented flag to column oriented, and after MatSetValues()
883        restore the original flags */
884     r1 = baij->roworiented;
885     r2 = a->roworiented;
886     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
887     baij->roworiented = PETSC_FALSE;
888     a->roworiented    = PETSC_FALSE;
889     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
890     while (1) {
891       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
892       if (!flg) break;
893 
894       for (i=0; i<n;) {
895         /* Now identify the consecutive vals belonging to the same row */
896         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
897         if (j < n) ncols = j-i;
898         else       ncols = n-i;
899         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
900         i = j;
901       }
902     }
903     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
904     baij->roworiented = r1;
905     a->roworiented    = r2;
906     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
907   }
908 
909   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
910   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
911 
912   /* determine if any processor has disassembled, if so we must
913      also disassemble ourselfs, in order that we may reassemble. */
914   /*
915      if nonzero structure of submatrix B cannot change then we know that
916      no processor disassembled thus we can skip this stuff
917   */
918   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
919     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
920     if (mat->was_assembled && !other_disassembled) {
921       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
922     }
923   }
924 
925   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
926     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
927   }
928   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
929   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
930   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
931 
932 #if defined(PETSC_USE_INFO)
933   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
934     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
935     baij->ht_total_ct  = 0;
936     baij->ht_insert_ct = 0;
937   }
938 #endif
939   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
940     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
941     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
942     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
943   }
944 
945   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
946   baij->rowvalues = 0;
947   PetscFunctionReturn(0);
948 }
949 
950 #undef __FUNCT__
951 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
952 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
953 {
954   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
955   PetscErrorCode    ierr;
956   PetscMPIInt       size = baij->size,rank = baij->rank;
957   PetscInt          bs = mat->rmap->bs;
958   PetscTruth        iascii,isdraw;
959   PetscViewer       sviewer;
960   PetscViewerFormat format;
961 
962   PetscFunctionBegin;
963   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
964   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
965   if (iascii) {
966     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
967     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
968       MatInfo info;
969       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
970       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
971       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
972               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
973               mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
974       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
975       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
976       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
977       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
978       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
979       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
980       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
981       PetscFunctionReturn(0);
982     } else if (format == PETSC_VIEWER_ASCII_INFO) {
983       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
984       PetscFunctionReturn(0);
985     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
986       PetscFunctionReturn(0);
987     }
988   }
989 
990   if (isdraw) {
991     PetscDraw       draw;
992     PetscTruth isnull;
993     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
994     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
995   }
996 
997   if (size == 1) {
998     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
999     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1000   } else {
1001     /* assemble the entire matrix onto first processor. */
1002     Mat         A;
1003     Mat_SeqBAIJ *Aloc;
1004     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1005     MatScalar   *a;
1006 
1007     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1008     /* Perhaps this should be the type of mat? */
1009     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1010     if (!rank) {
1011       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1012     } else {
1013       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1014     }
1015     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1016     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1017     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1018 
1019     /* copy over the A part */
1020     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1021     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1022     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1023 
1024     for (i=0; i<mbs; i++) {
1025       rvals[0] = bs*(baij->rstartbs + i);
1026       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1027       for (j=ai[i]; j<ai[i+1]; j++) {
1028         col = (baij->cstartbs+aj[j])*bs;
1029         for (k=0; k<bs; k++) {
1030           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1031           col++; a += bs;
1032         }
1033       }
1034     }
1035     /* copy over the B part */
1036     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1037     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1038     for (i=0; i<mbs; i++) {
1039       rvals[0] = bs*(baij->rstartbs + i);
1040       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1041       for (j=ai[i]; j<ai[i+1]; j++) {
1042         col = baij->garray[aj[j]]*bs;
1043         for (k=0; k<bs; k++) {
1044           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1045           col++; a += bs;
1046         }
1047       }
1048     }
1049     ierr = PetscFree(rvals);CHKERRQ(ierr);
1050     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1051     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1052     /*
1053        Everyone has to call to draw the matrix since the graphics waits are
1054        synchronized across all processors that share the PetscDraw object
1055     */
1056     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1057     if (!rank) {
1058       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1059       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1060     }
1061     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1062     ierr = MatDestroy(A);CHKERRQ(ierr);
1063   }
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 #undef __FUNCT__
1068 #define __FUNCT__ "MatView_MPIBAIJ"
1069 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1070 {
1071   PetscErrorCode ierr;
1072   PetscTruth     iascii,isdraw,issocket,isbinary;
1073 
1074   PetscFunctionBegin;
1075   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1076   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1077   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1078   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1079   if (iascii || isdraw || issocket || isbinary) {
1080     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1081   } else {
1082     SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1083   }
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1089 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1090 {
1091   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1092   PetscErrorCode ierr;
1093 
1094   PetscFunctionBegin;
1095 #if defined(PETSC_USE_LOG)
1096   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1097 #endif
1098   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1099   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1100   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1101   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1102 #if defined (PETSC_USE_CTABLE)
1103   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1104 #else
1105   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1106 #endif
1107   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1108   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1109   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1110   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1111   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1112   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1113   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1114   ierr = PetscFree(baij);CHKERRQ(ierr);
1115 
1116   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1117   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1118   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1119   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1120   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1121   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1122   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1123   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1124   PetscFunctionReturn(0);
1125 }
1126 
1127 #undef __FUNCT__
1128 #define __FUNCT__ "MatMult_MPIBAIJ"
1129 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1130 {
1131   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1132   PetscErrorCode ierr;
1133   PetscInt       nt;
1134 
1135   PetscFunctionBegin;
1136   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1137   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1138   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1139   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1140   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1142   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1143   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 #undef __FUNCT__
1148 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1149 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1150 {
1151   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1152   PetscErrorCode ierr;
1153 
1154   PetscFunctionBegin;
1155   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1156   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1157   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1158   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 #undef __FUNCT__
1163 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1164 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1165 {
1166   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1167   PetscErrorCode ierr;
1168   PetscTruth     merged;
1169 
1170   PetscFunctionBegin;
1171   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1172   /* do nondiagonal part */
1173   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1174   if (!merged) {
1175     /* send it on its way */
1176     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1177     /* do local part */
1178     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1179     /* receive remote parts: note this assumes the values are not actually */
1180     /* inserted in yy until the next line */
1181     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1182   } else {
1183     /* do local part */
1184     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1185     /* send it on its way */
1186     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1187     /* values actually were received in the Begin() but we need to call this nop */
1188     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1189   }
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 #undef __FUNCT__
1194 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1195 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1196 {
1197   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1198   PetscErrorCode ierr;
1199 
1200   PetscFunctionBegin;
1201   /* do nondiagonal part */
1202   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1203   /* send it on its way */
1204   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1205   /* do local part */
1206   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1207   /* receive remote parts: note this assumes the values are not actually */
1208   /* inserted in yy until the next line, which is true for my implementation*/
1209   /* but is not perhaps always true. */
1210   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 /*
1215   This only works correctly for square matrices where the subblock A->A is the
1216    diagonal block
1217 */
1218 #undef __FUNCT__
1219 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1220 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1221 {
1222   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1223   PetscErrorCode ierr;
1224 
1225   PetscFunctionBegin;
1226   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1227   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 #undef __FUNCT__
1232 #define __FUNCT__ "MatScale_MPIBAIJ"
1233 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1234 {
1235   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1236   PetscErrorCode ierr;
1237 
1238   PetscFunctionBegin;
1239   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1240   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1246 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1247 {
1248   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1249   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1250   PetscErrorCode ierr;
1251   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1252   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1253   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1254 
1255   PetscFunctionBegin;
1256   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1257   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1258   mat->getrowactive = PETSC_TRUE;
1259 
1260   if (!mat->rowvalues && (idx || v)) {
1261     /*
1262         allocate enough space to hold information from the longest row.
1263     */
1264     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1265     PetscInt     max = 1,mbs = mat->mbs,tmp;
1266     for (i=0; i<mbs; i++) {
1267       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1268       if (max < tmp) { max = tmp; }
1269     }
1270     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1271   }
1272   lrow = row - brstart;
1273 
1274   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1275   if (!v)   {pvA = 0; pvB = 0;}
1276   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1277   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1278   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1279   nztot = nzA + nzB;
1280 
1281   cmap  = mat->garray;
1282   if (v  || idx) {
1283     if (nztot) {
1284       /* Sort by increasing column numbers, assuming A and B already sorted */
1285       PetscInt imark = -1;
1286       if (v) {
1287         *v = v_p = mat->rowvalues;
1288         for (i=0; i<nzB; i++) {
1289           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1290           else break;
1291         }
1292         imark = i;
1293         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1294         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1295       }
1296       if (idx) {
1297         *idx = idx_p = mat->rowindices;
1298         if (imark > -1) {
1299           for (i=0; i<imark; i++) {
1300             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1301           }
1302         } else {
1303           for (i=0; i<nzB; i++) {
1304             if (cmap[cworkB[i]/bs] < cstart)
1305               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1306             else break;
1307           }
1308           imark = i;
1309         }
1310         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1311         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1312       }
1313     } else {
1314       if (idx) *idx = 0;
1315       if (v)   *v   = 0;
1316     }
1317   }
1318   *nz = nztot;
1319   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1320   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 #undef __FUNCT__
1325 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1326 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1327 {
1328   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1329 
1330   PetscFunctionBegin;
1331   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1332   baij->getrowactive = PETSC_FALSE;
1333   PetscFunctionReturn(0);
1334 }
1335 
1336 #undef __FUNCT__
1337 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1338 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1339 {
1340   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1341   PetscErrorCode ierr;
1342 
1343   PetscFunctionBegin;
1344   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1345   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 #undef __FUNCT__
1350 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1351 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1352 {
1353   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1354   Mat            A = a->A,B = a->B;
1355   PetscErrorCode ierr;
1356   PetscReal      isend[5],irecv[5];
1357 
1358   PetscFunctionBegin;
1359   info->block_size     = (PetscReal)matin->rmap->bs;
1360   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1361   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1362   isend[3] = info->memory;  isend[4] = info->mallocs;
1363   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1364   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1365   isend[3] += info->memory;  isend[4] += info->mallocs;
1366   if (flag == MAT_LOCAL) {
1367     info->nz_used      = isend[0];
1368     info->nz_allocated = isend[1];
1369     info->nz_unneeded  = isend[2];
1370     info->memory       = isend[3];
1371     info->mallocs      = isend[4];
1372   } else if (flag == MAT_GLOBAL_MAX) {
1373     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1374     info->nz_used      = irecv[0];
1375     info->nz_allocated = irecv[1];
1376     info->nz_unneeded  = irecv[2];
1377     info->memory       = irecv[3];
1378     info->mallocs      = irecv[4];
1379   } else if (flag == MAT_GLOBAL_SUM) {
1380     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1381     info->nz_used      = irecv[0];
1382     info->nz_allocated = irecv[1];
1383     info->nz_unneeded  = irecv[2];
1384     info->memory       = irecv[3];
1385     info->mallocs      = irecv[4];
1386   } else {
1387     SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1388   }
1389   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1390   info->fill_ratio_needed = 0;
1391   info->factor_mallocs    = 0;
1392   PetscFunctionReturn(0);
1393 }
1394 
1395 #undef __FUNCT__
1396 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1397 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg)
1398 {
1399   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1400   PetscErrorCode ierr;
1401 
1402   PetscFunctionBegin;
1403   switch (op) {
1404   case MAT_NEW_NONZERO_LOCATIONS:
1405   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1406   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1407   case MAT_KEEP_NONZERO_PATTERN:
1408   case MAT_NEW_NONZERO_LOCATION_ERR:
1409     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1410     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1411     break;
1412   case MAT_ROW_ORIENTED:
1413     a->roworiented = flg;
1414     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1415     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1416     break;
1417   case MAT_NEW_DIAGONALS:
1418     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1419     break;
1420   case MAT_IGNORE_OFF_PROC_ENTRIES:
1421     a->donotstash = flg;
1422     break;
1423   case MAT_USE_HASH_TABLE:
1424     a->ht_flag = flg;
1425     break;
1426   case MAT_SYMMETRIC:
1427   case MAT_STRUCTURALLY_SYMMETRIC:
1428   case MAT_HERMITIAN:
1429   case MAT_SYMMETRY_ETERNAL:
1430     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1431     break;
1432   default:
1433     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1434   }
1435   PetscFunctionReturn(0);
1436 }
1437 
1438 #undef __FUNCT__
1439 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1440 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1441 {
1442   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1443   Mat_SeqBAIJ    *Aloc;
1444   Mat            B;
1445   PetscErrorCode ierr;
1446   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1447   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1448   MatScalar      *a;
1449 
1450   PetscFunctionBegin;
1451   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1452   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1453     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1454     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1455     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1456     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1457   } else {
1458     B = *matout;
1459   }
1460 
1461   /* copy over the A part */
1462   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1463   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1464   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1465 
1466   for (i=0; i<mbs; i++) {
1467     rvals[0] = bs*(baij->rstartbs + i);
1468     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1469     for (j=ai[i]; j<ai[i+1]; j++) {
1470       col = (baij->cstartbs+aj[j])*bs;
1471       for (k=0; k<bs; k++) {
1472         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1473         col++; a += bs;
1474       }
1475     }
1476   }
1477   /* copy over the B part */
1478   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1479   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1480   for (i=0; i<mbs; i++) {
1481     rvals[0] = bs*(baij->rstartbs + i);
1482     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1483     for (j=ai[i]; j<ai[i+1]; j++) {
1484       col = baij->garray[aj[j]]*bs;
1485       for (k=0; k<bs; k++) {
1486         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1487         col++; a += bs;
1488       }
1489     }
1490   }
1491   ierr = PetscFree(rvals);CHKERRQ(ierr);
1492   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1493   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1494 
1495   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1496     *matout = B;
1497   } else {
1498     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1499   }
1500   PetscFunctionReturn(0);
1501 }
1502 
1503 #undef __FUNCT__
1504 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1505 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1506 {
1507   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1508   Mat            a = baij->A,b = baij->B;
1509   PetscErrorCode ierr;
1510   PetscInt       s1,s2,s3;
1511 
1512   PetscFunctionBegin;
1513   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1514   if (rr) {
1515     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1516     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1517     /* Overlap communication with computation. */
1518     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1519   }
1520   if (ll) {
1521     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1522     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1523     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1524   }
1525   /* scale  the diagonal block */
1526   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1527 
1528   if (rr) {
1529     /* Do a scatter end and then right scale the off-diagonal block */
1530     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1531     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1532   }
1533 
1534   PetscFunctionReturn(0);
1535 }
1536 
1537 #undef __FUNCT__
1538 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1539 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1540 {
1541   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1542   PetscErrorCode ierr;
1543   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1544   PetscInt       i,*owners = A->rmap->range;
1545   PetscInt       *nprocs,j,idx,nsends,row;
1546   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1547   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1548   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1549   MPI_Comm       comm = ((PetscObject)A)->comm;
1550   MPI_Request    *send_waits,*recv_waits;
1551   MPI_Status     recv_status,*send_status;
1552 #if defined(PETSC_DEBUG)
1553   PetscTruth     found = PETSC_FALSE;
1554 #endif
1555 
1556   PetscFunctionBegin;
1557   /*  first count number of contributors to each processor */
1558   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1559   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1560   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1561   j = 0;
1562   for (i=0; i<N; i++) {
1563     if (lastidx > (idx = rows[i])) j = 0;
1564     lastidx = idx;
1565     for (; j<size; j++) {
1566       if (idx >= owners[j] && idx < owners[j+1]) {
1567         nprocs[2*j]++;
1568         nprocs[2*j+1] = 1;
1569         owner[i] = j;
1570 #if defined(PETSC_DEBUG)
1571         found = PETSC_TRUE;
1572 #endif
1573         break;
1574       }
1575     }
1576 #if defined(PETSC_DEBUG)
1577     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1578     found = PETSC_FALSE;
1579 #endif
1580   }
1581   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1582 
1583   /* inform other processors of number of messages and max length*/
1584   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1585 
1586   /* post receives:   */
1587   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1588   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1589   for (i=0; i<nrecvs; i++) {
1590     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1591   }
1592 
1593   /* do sends:
1594      1) starts[i] gives the starting index in svalues for stuff going to
1595      the ith processor
1596   */
1597   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1598   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1599   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1600   starts[0]  = 0;
1601   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1602   for (i=0; i<N; i++) {
1603     svalues[starts[owner[i]]++] = rows[i];
1604   }
1605 
1606   starts[0] = 0;
1607   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1608   count = 0;
1609   for (i=0; i<size; i++) {
1610     if (nprocs[2*i+1]) {
1611       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1612     }
1613   }
1614   ierr = PetscFree(starts);CHKERRQ(ierr);
1615 
1616   base = owners[rank];
1617 
1618   /*  wait on receives */
1619   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1620   count  = nrecvs;
1621   slen = 0;
1622   while (count) {
1623     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1624     /* unpack receives into our local space */
1625     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1626     source[imdex]  = recv_status.MPI_SOURCE;
1627     lens[imdex]    = n;
1628     slen          += n;
1629     count--;
1630   }
1631   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1632 
1633   /* move the data into the send scatter */
1634   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1635   count = 0;
1636   for (i=0; i<nrecvs; i++) {
1637     values = rvalues + i*nmax;
1638     for (j=0; j<lens[i]; j++) {
1639       lrows[count++] = values[j] - base;
1640     }
1641   }
1642   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1643   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1644   ierr = PetscFree(owner);CHKERRQ(ierr);
1645   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1646 
1647   /* actually zap the local rows */
1648   /*
1649         Zero the required rows. If the "diagonal block" of the matrix
1650      is square and the user wishes to set the diagonal we use separate
1651      code so that MatSetValues() is not called for each diagonal allocating
1652      new memory, thus calling lots of mallocs and slowing things down.
1653 
1654   */
1655   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1656   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1657   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1658     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1659   } else if (diag != 0.0) {
1660     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1661     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1662        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1663     for (i=0; i<slen; i++) {
1664       row  = lrows[i] + rstart_bs;
1665       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1666     }
1667     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1668     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1669   } else {
1670     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1671   }
1672 
1673   ierr = PetscFree(lrows);CHKERRQ(ierr);
1674 
1675   /* wait on sends */
1676   if (nsends) {
1677     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1678     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1679     ierr = PetscFree(send_status);CHKERRQ(ierr);
1680   }
1681   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1682   ierr = PetscFree(svalues);CHKERRQ(ierr);
1683 
1684   PetscFunctionReturn(0);
1685 }
1686 
1687 #undef __FUNCT__
1688 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1689 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1690 {
1691   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1692   PetscErrorCode ierr;
1693 
1694   PetscFunctionBegin;
1695   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1696   PetscFunctionReturn(0);
1697 }
1698 
1699 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1700 
1701 #undef __FUNCT__
1702 #define __FUNCT__ "MatEqual_MPIBAIJ"
1703 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1704 {
1705   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1706   Mat            a,b,c,d;
1707   PetscTruth     flg;
1708   PetscErrorCode ierr;
1709 
1710   PetscFunctionBegin;
1711   a = matA->A; b = matA->B;
1712   c = matB->A; d = matB->B;
1713 
1714   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1715   if (flg) {
1716     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1717   }
1718   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1719   PetscFunctionReturn(0);
1720 }
1721 
1722 #undef __FUNCT__
1723 #define __FUNCT__ "MatCopy_MPIBAIJ"
1724 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1725 {
1726   PetscErrorCode ierr;
1727   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1728   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1729 
1730   PetscFunctionBegin;
1731   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1732   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1733     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1734   } else {
1735     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1736     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1737   }
1738   PetscFunctionReturn(0);
1739 }
1740 
1741 #undef __FUNCT__
1742 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1743 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1744 {
1745   PetscErrorCode ierr;
1746 
1747   PetscFunctionBegin;
1748   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1749   PetscFunctionReturn(0);
1750 }
1751 
1752 #undef __FUNCT__
1753 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1754 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1755 {
1756   PetscErrorCode ierr;
1757   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1758   PetscBLASInt   bnz,one=1;
1759   Mat_SeqBAIJ    *x,*y;
1760 
1761   PetscFunctionBegin;
1762   if (str == SAME_NONZERO_PATTERN) {
1763     PetscScalar alpha = a;
1764     x = (Mat_SeqBAIJ *)xx->A->data;
1765     y = (Mat_SeqBAIJ *)yy->A->data;
1766     bnz = PetscBLASIntCast(x->nz);
1767     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1768     x = (Mat_SeqBAIJ *)xx->B->data;
1769     y = (Mat_SeqBAIJ *)yy->B->data;
1770     bnz = PetscBLASIntCast(x->nz);
1771     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1772   } else {
1773     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1774   }
1775   PetscFunctionReturn(0);
1776 }
1777 
1778 #undef __FUNCT__
1779 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ"
1780 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1781 {
1782   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1783   PetscInt rbs,cbs;
1784   PetscErrorCode ierr;
1785 
1786   PetscFunctionBegin;
1787   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1788   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1789   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1790   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1791   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
1792   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
1793   PetscFunctionReturn(0);
1794 }
1795 
1796 #undef __FUNCT__
1797 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1798 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1799 {
1800   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1801   PetscErrorCode ierr;
1802 
1803   PetscFunctionBegin;
1804   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1805   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1806   PetscFunctionReturn(0);
1807 }
1808 
1809 #undef __FUNCT__
1810 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1811 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1812 {
1813   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1814   PetscErrorCode ierr;
1815 
1816   PetscFunctionBegin;
1817   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1818   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1819   PetscFunctionReturn(0);
1820 }
1821 
1822 #undef __FUNCT__
1823 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1824 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1825 {
1826   PetscErrorCode ierr;
1827   IS             iscol_local;
1828   PetscInt       csize;
1829 
1830   PetscFunctionBegin;
1831   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1832   if (call == MAT_REUSE_MATRIX) {
1833     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1834     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1835   } else {
1836     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1837   }
1838   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1839   if (call == MAT_INITIAL_MATRIX) {
1840     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1841     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
1842   }
1843   PetscFunctionReturn(0);
1844 }
1845 
1846 #undef __FUNCT__
1847 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1848 /*
1849     Not great since it makes two copies of the submatrix, first an SeqBAIJ
1850   in local and then by concatenating the local matrices the end result.
1851   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
1852 */
1853 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1854 {
1855   PetscErrorCode ierr;
1856   PetscMPIInt    rank,size;
1857   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1858   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1859   Mat            *local,M,Mreuse;
1860   MatScalar      *vwork,*aa;
1861   MPI_Comm       comm = ((PetscObject)mat)->comm;
1862   Mat_SeqBAIJ    *aij;
1863 
1864 
1865   PetscFunctionBegin;
1866   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1867   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1868 
1869   if (call ==  MAT_REUSE_MATRIX) {
1870     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1871     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1872     local = &Mreuse;
1873     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1874   } else {
1875     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1876     Mreuse = *local;
1877     ierr   = PetscFree(local);CHKERRQ(ierr);
1878   }
1879 
1880   /*
1881       m - number of local rows
1882       n - number of columns (same on all processors)
1883       rstart - first row in new global matrix generated
1884   */
1885   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1886   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1887   m    = m/bs;
1888   n    = n/bs;
1889 
1890   if (call == MAT_INITIAL_MATRIX) {
1891     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1892     ii  = aij->i;
1893     jj  = aij->j;
1894 
1895     /*
1896         Determine the number of non-zeros in the diagonal and off-diagonal
1897         portions of the matrix in order to do correct preallocation
1898     */
1899 
1900     /* first get start and end of "diagonal" columns */
1901     if (csize == PETSC_DECIDE) {
1902       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
1903       if (mglobal == n*bs) { /* square matrix */
1904 	nlocal = m;
1905       } else {
1906         nlocal = n/size + ((n % size) > rank);
1907       }
1908     } else {
1909       nlocal = csize/bs;
1910     }
1911     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1912     rstart = rend - nlocal;
1913     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
1914 
1915     /* next, compute all the lengths */
1916     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
1917     olens = dlens + m;
1918     for (i=0; i<m; i++) {
1919       jend = ii[i+1] - ii[i];
1920       olen = 0;
1921       dlen = 0;
1922       for (j=0; j<jend; j++) {
1923         if (*jj < rstart || *jj >= rend) olen++;
1924         else dlen++;
1925         jj++;
1926       }
1927       olens[i] = olen;
1928       dlens[i] = dlen;
1929     }
1930     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
1931     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
1932     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
1933     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
1934     ierr = PetscFree(dlens);CHKERRQ(ierr);
1935   } else {
1936     PetscInt ml,nl;
1937 
1938     M = *newmat;
1939     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1940     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1941     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1942     /*
1943          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1944        rather than the slower MatSetValues().
1945     */
1946     M->was_assembled = PETSC_TRUE;
1947     M->assembled     = PETSC_FALSE;
1948   }
1949   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1950   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1951   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1952   ii  = aij->i;
1953   jj  = aij->j;
1954   aa  = aij->a;
1955   for (i=0; i<m; i++) {
1956     row   = rstart/bs + i;
1957     nz    = ii[i+1] - ii[i];
1958     cwork = jj;     jj += nz;
1959     vwork = aa;     aa += nz;
1960     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1961   }
1962 
1963   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1964   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1965   *newmat = M;
1966 
1967   /* save submatrix used in processor for next request */
1968   if (call ==  MAT_INITIAL_MATRIX) {
1969     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1970     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1971   }
1972 
1973   PetscFunctionReturn(0);
1974 }
1975 
1976 #undef __FUNCT__
1977 #define __FUNCT__ "MatPermute_MPIBAIJ"
1978 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
1979 {
1980   MPI_Comm       comm,pcomm;
1981   PetscInt       first,local_size,nrows;
1982   const PetscInt *rows;
1983   PetscMPIInt    size;
1984   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1985   PetscErrorCode ierr;
1986 
1987   PetscFunctionBegin;
1988   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1989   /* make a collective version of 'rowp' */
1990   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1991   if (pcomm==comm) {
1992     crowp = rowp;
1993   } else {
1994     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1995     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1996     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1997     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1998   }
1999   /* collect the global row permutation and invert it */
2000   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2001   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2002   if (pcomm!=comm) {
2003     ierr = ISDestroy(crowp);CHKERRQ(ierr);
2004   }
2005   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2006   /* get the local target indices */
2007   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2008   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2009   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2010   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
2011   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2012   ierr = ISDestroy(irowp);CHKERRQ(ierr);
2013   /* the column permutation is so much easier;
2014      make a local version of 'colp' and invert it */
2015   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2016   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2017   if (size==1) {
2018     lcolp = colp;
2019   } else {
2020     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2021     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2022     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
2023   }
2024   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2025   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2026   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2027   if (size>1) {
2028     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2029     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
2030   }
2031   /* now we just get the submatrix */
2032   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2033   /* clean up */
2034   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
2035   ierr = ISDestroy(icolp);CHKERRQ(ierr);
2036   PetscFunctionReturn(0);
2037 }
2038 
2039 #undef __FUNCT__
2040 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2041 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2042 {
2043   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2044   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2045 
2046   PetscFunctionBegin;
2047   if (nghosts) { *nghosts = B->nbs;}
2048   if (ghosts) {*ghosts = baij->garray;}
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2053 
2054 #undef __FUNCT__
2055 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2056 /*
2057     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2058 */
2059 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2060 {
2061   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2062   PetscErrorCode        ierr;
2063   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2064   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2065   const PetscInt        *is;
2066   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2067   PetscInt              *rowhit,M,cstart,cend,colb;
2068   PetscInt              *columnsforrow,l;
2069   IS                    *isa;
2070   PetscTruth             done,flg;
2071   ISLocalToGlobalMapping map = mat->bmapping;
2072   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2073 
2074   PetscFunctionBegin;
2075   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2076   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2077 
2078   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2079   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2080   M                = mat->rmap->n/bs;
2081   cstart           = mat->cmap->rstart/bs;
2082   cend             = mat->cmap->rend/bs;
2083   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2084   c->N             = mat->cmap->N/bs;
2085   c->m             = mat->rmap->n/bs;
2086   c->rstart        = mat->rmap->rstart/bs;
2087 
2088   c->ncolors       = nis;
2089   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2090   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2091   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2092   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2093   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2094   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2095 
2096   /* Allow access to data structures of local part of matrix */
2097   if (!baij->colmap) {
2098     ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2099   }
2100   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2101   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2102 
2103   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2104   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2105 
2106   for (i=0; i<nis; i++) {
2107     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2108     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2109     c->ncolumns[i] = n;
2110     if (n) {
2111       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2112       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2113       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2114     } else {
2115       c->columns[i]  = 0;
2116     }
2117 
2118     if (ctype == IS_COLORING_GLOBAL){
2119       /* Determine the total (parallel) number of columns of this color */
2120       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2121       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2122 
2123       nn   = PetscMPIIntCast(n);
2124       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2125       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2126       if (!nctot) {
2127         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2128       }
2129 
2130       disp[0] = 0;
2131       for (j=1; j<size; j++) {
2132         disp[j] = disp[j-1] + ncolsonproc[j-1];
2133       }
2134 
2135       /* Get complete list of columns for color on each processor */
2136       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2137       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2138       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2139     } else if (ctype == IS_COLORING_GHOSTED){
2140       /* Determine local number of columns of this color on this process, including ghost points */
2141       nctot = n;
2142       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2143       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2144     } else {
2145       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2146     }
2147 
2148     /*
2149        Mark all rows affect by these columns
2150     */
2151     /* Temporary option to allow for debugging/testing */
2152     flg  = PETSC_FALSE;
2153     ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2154     if (!flg) {/*-----------------------------------------------------------------------------*/
2155       /* crude, fast version */
2156       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2157       /* loop over columns*/
2158       for (j=0; j<nctot; j++) {
2159         if (ctype == IS_COLORING_GHOSTED) {
2160           col = ltog[cols[j]];
2161         } else {
2162           col  = cols[j];
2163         }
2164         if (col >= cstart && col < cend) {
2165           /* column is in diagonal block of matrix */
2166           rows = A_cj + A_ci[col-cstart];
2167           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2168         } else {
2169 #if defined (PETSC_USE_CTABLE)
2170           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2171 	  colb --;
2172 #else
2173           colb = baij->colmap[col] - 1;
2174 #endif
2175           if (colb == -1) {
2176             m = 0;
2177           } else {
2178             colb = colb/bs;
2179             rows = B_cj + B_ci[colb];
2180             m    = B_ci[colb+1] - B_ci[colb];
2181           }
2182         }
2183         /* loop over columns marking them in rowhit */
2184         for (k=0; k<m; k++) {
2185           rowhit[*rows++] = col + 1;
2186         }
2187       }
2188 
2189       /* count the number of hits */
2190       nrows = 0;
2191       for (j=0; j<M; j++) {
2192         if (rowhit[j]) nrows++;
2193       }
2194       c->nrows[i]         = nrows;
2195       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2196       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2197       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2198       nrows = 0;
2199       for (j=0; j<M; j++) {
2200         if (rowhit[j]) {
2201           c->rows[i][nrows]           = j;
2202           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2203           nrows++;
2204         }
2205       }
2206     } else {/*-------------------------------------------------------------------------------*/
2207       /* slow version, using rowhit as a linked list */
2208       PetscInt currentcol,fm,mfm;
2209       rowhit[M] = M;
2210       nrows     = 0;
2211       /* loop over columns*/
2212       for (j=0; j<nctot; j++) {
2213         if (ctype == IS_COLORING_GHOSTED) {
2214           col = ltog[cols[j]];
2215         } else {
2216           col  = cols[j];
2217         }
2218         if (col >= cstart && col < cend) {
2219           /* column is in diagonal block of matrix */
2220           rows = A_cj + A_ci[col-cstart];
2221           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2222         } else {
2223 #if defined (PETSC_USE_CTABLE)
2224           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2225           colb --;
2226 #else
2227           colb = baij->colmap[col] - 1;
2228 #endif
2229           if (colb == -1) {
2230             m = 0;
2231           } else {
2232             colb = colb/bs;
2233             rows = B_cj + B_ci[colb];
2234             m    = B_ci[colb+1] - B_ci[colb];
2235           }
2236         }
2237 
2238         /* loop over columns marking them in rowhit */
2239         fm    = M; /* fm points to first entry in linked list */
2240         for (k=0; k<m; k++) {
2241           currentcol = *rows++;
2242 	  /* is it already in the list? */
2243           do {
2244             mfm  = fm;
2245             fm   = rowhit[fm];
2246           } while (fm < currentcol);
2247           /* not in list so add it */
2248           if (fm != currentcol) {
2249             nrows++;
2250             columnsforrow[currentcol] = col;
2251             /* next three lines insert new entry into linked list */
2252             rowhit[mfm]               = currentcol;
2253             rowhit[currentcol]        = fm;
2254             fm                        = currentcol;
2255             /* fm points to present position in list since we know the columns are sorted */
2256           } else {
2257             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2258           }
2259         }
2260       }
2261       c->nrows[i]         = nrows;
2262       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2263       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2264       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2265       /* now store the linked list of rows into c->rows[i] */
2266       nrows = 0;
2267       fm    = rowhit[M];
2268       do {
2269         c->rows[i][nrows]            = fm;
2270         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2271         fm                           = rowhit[fm];
2272       } while (fm < M);
2273     } /* ---------------------------------------------------------------------------------------*/
2274     ierr = PetscFree(cols);CHKERRQ(ierr);
2275   }
2276 
2277   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2278   /*
2279        vscale will contain the "diagonal" on processor scalings followed by the off processor
2280   */
2281   if (ctype == IS_COLORING_GLOBAL) {
2282     PetscInt *garray;
2283     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2284     for (i=0; i<baij->B->cmap->n/bs; i++) {
2285       for (j=0; j<bs; j++) {
2286         garray[i*bs+j] = bs*baij->garray[i]+j;
2287       }
2288     }
2289     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2290     ierr = PetscFree(garray);CHKERRQ(ierr);
2291     CHKMEMQ;
2292     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2293     for (k=0; k<c->ncolors; k++) {
2294       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2295       for (l=0; l<c->nrows[k]; l++) {
2296         col = c->columnsforrow[k][l];
2297         if (col >= cstart && col < cend) {
2298           /* column is in diagonal block of matrix */
2299           colb = col - cstart;
2300         } else {
2301           /* column  is in "off-processor" part */
2302 #if defined (PETSC_USE_CTABLE)
2303           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2304           colb --;
2305 #else
2306           colb = baij->colmap[col] - 1;
2307 #endif
2308           colb = colb/bs;
2309           colb += cend - cstart;
2310         }
2311         c->vscaleforrow[k][l] = colb;
2312       }
2313     }
2314   } else if (ctype == IS_COLORING_GHOSTED) {
2315     /* Get gtol mapping */
2316     PetscInt N = mat->cmap->N, *gtol;
2317     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2318     for (i=0; i<N; i++) gtol[i] = -1;
2319     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2320 
2321     c->vscale = 0; /* will be created in MatFDColoringApply() */
2322     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2323     for (k=0; k<c->ncolors; k++) {
2324       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2325       for (l=0; l<c->nrows[k]; l++) {
2326         col = c->columnsforrow[k][l];      /* global column index */
2327         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2328       }
2329     }
2330     ierr = PetscFree(gtol);CHKERRQ(ierr);
2331   }
2332   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2333 
2334   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2335   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2336   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2337   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2338     CHKMEMQ;
2339   PetscFunctionReturn(0);
2340 }
2341 
2342 #undef __FUNCT__
2343 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ"
2344 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat)
2345 {
2346   Mat            B;
2347   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2348   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2349   Mat_SeqAIJ     *b;
2350   PetscErrorCode ierr;
2351   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2352   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2353   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2354 
2355   PetscFunctionBegin;
2356   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2357   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2358 
2359   /* ----------------------------------------------------------------
2360      Tell every processor the number of nonzeros per row
2361   */
2362   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2363   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2364     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2365   }
2366   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2367   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2368   displs     = recvcounts + size;
2369   for (i=0; i<size; i++) {
2370     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2371     displs[i]     = A->rmap->range[i]/bs;
2372   }
2373 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2374   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2375 #else
2376   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2377 #endif
2378   /* ---------------------------------------------------------------
2379      Create the sequential matrix of the same type as the local block diagonal
2380   */
2381   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2382   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2383   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2384   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2385   b = (Mat_SeqAIJ *)B->data;
2386 
2387   /*--------------------------------------------------------------------
2388     Copy my part of matrix column indices over
2389   */
2390   sendcount  = ad->nz + bd->nz;
2391   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2392   a_jsendbuf = ad->j;
2393   b_jsendbuf = bd->j;
2394   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2395   cnt        = 0;
2396   for (i=0; i<n; i++) {
2397 
2398     /* put in lower diagonal portion */
2399     m = bd->i[i+1] - bd->i[i];
2400     while (m > 0) {
2401       /* is it above diagonal (in bd (compressed) numbering) */
2402       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2403       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2404       m--;
2405     }
2406 
2407     /* put in diagonal portion */
2408     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2409       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2410     }
2411 
2412     /* put in upper diagonal portion */
2413     while (m-- > 0) {
2414       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2415     }
2416   }
2417   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2418 
2419   /*--------------------------------------------------------------------
2420     Gather all column indices to all processors
2421   */
2422   for (i=0; i<size; i++) {
2423     recvcounts[i] = 0;
2424     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2425       recvcounts[i] += lens[j];
2426     }
2427   }
2428   displs[0]  = 0;
2429   for (i=1; i<size; i++) {
2430     displs[i] = displs[i-1] + recvcounts[i-1];
2431   }
2432 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2433   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2434 #else
2435   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2436 #endif
2437   /*--------------------------------------------------------------------
2438     Assemble the matrix into useable form (note numerical values not yet set)
2439   */
2440   /* set the b->ilen (length of each row) values */
2441   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2442   /* set the b->i indices */
2443   b->i[0] = 0;
2444   for (i=1; i<=A->rmap->N/bs; i++) {
2445     b->i[i] = b->i[i-1] + lens[i-1];
2446   }
2447   ierr = PetscFree(lens);CHKERRQ(ierr);
2448   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2449   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2450   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2451 
2452   if (A->symmetric){
2453     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2454   } else if (A->hermitian) {
2455     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2456   } else if (A->structurally_symmetric) {
2457     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2458   }
2459   *newmat = B;
2460   PetscFunctionReturn(0);
2461 }
2462 
2463 #undef __FUNCT__
2464 #define __FUNCT__ "MatSOR_MPIBAIJ"
2465 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2466 {
2467   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2468   PetscErrorCode ierr;
2469   Vec            bb1 = 0;
2470 
2471   PetscFunctionBegin;
2472   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2473     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2474   }
2475 
2476   if (flag == SOR_APPLY_UPPER) {
2477     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2478     PetscFunctionReturn(0);
2479   }
2480 
2481   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2482     if (flag & SOR_ZERO_INITIAL_GUESS) {
2483       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2484       its--;
2485     }
2486 
2487     while (its--) {
2488       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2489       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2490 
2491       /* update rhs: bb1 = bb - B*x */
2492       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2493       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2494 
2495       /* local sweep */
2496       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2497     }
2498   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2499     if (flag & SOR_ZERO_INITIAL_GUESS) {
2500       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2501       its--;
2502     }
2503     while (its--) {
2504       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2505       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2506 
2507       /* update rhs: bb1 = bb - B*x */
2508       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2509       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2510 
2511       /* local sweep */
2512       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2513     }
2514   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2515     if (flag & SOR_ZERO_INITIAL_GUESS) {
2516       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2517       its--;
2518     }
2519     while (its--) {
2520       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2521       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2522 
2523       /* update rhs: bb1 = bb - B*x */
2524       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2525       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2526 
2527       /* local sweep */
2528       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2529     }
2530   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2531 
2532   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
2533   PetscFunctionReturn(0);
2534 }
2535 
2536 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2537 
2538 
2539 /* -------------------------------------------------------------------*/
2540 static struct _MatOps MatOps_Values = {
2541        MatSetValues_MPIBAIJ,
2542        MatGetRow_MPIBAIJ,
2543        MatRestoreRow_MPIBAIJ,
2544        MatMult_MPIBAIJ,
2545 /* 4*/ MatMultAdd_MPIBAIJ,
2546        MatMultTranspose_MPIBAIJ,
2547        MatMultTransposeAdd_MPIBAIJ,
2548        0,
2549        0,
2550        0,
2551 /*10*/ 0,
2552        0,
2553        0,
2554        MatSOR_MPIBAIJ,
2555        MatTranspose_MPIBAIJ,
2556 /*15*/ MatGetInfo_MPIBAIJ,
2557        MatEqual_MPIBAIJ,
2558        MatGetDiagonal_MPIBAIJ,
2559        MatDiagonalScale_MPIBAIJ,
2560        MatNorm_MPIBAIJ,
2561 /*20*/ MatAssemblyBegin_MPIBAIJ,
2562        MatAssemblyEnd_MPIBAIJ,
2563        MatSetOption_MPIBAIJ,
2564        MatZeroEntries_MPIBAIJ,
2565 /*24*/ MatZeroRows_MPIBAIJ,
2566        0,
2567        0,
2568        0,
2569        0,
2570 /*29*/ MatSetUpPreallocation_MPIBAIJ,
2571        0,
2572        0,
2573        0,
2574        0,
2575 /*34*/ MatDuplicate_MPIBAIJ,
2576        0,
2577        0,
2578        0,
2579        0,
2580 /*39*/ MatAXPY_MPIBAIJ,
2581        MatGetSubMatrices_MPIBAIJ,
2582        MatIncreaseOverlap_MPIBAIJ,
2583        MatGetValues_MPIBAIJ,
2584        MatCopy_MPIBAIJ,
2585 /*44*/ 0,
2586        MatScale_MPIBAIJ,
2587        0,
2588        0,
2589        0,
2590 /*49*/ MatSetBlockSize_MPIBAIJ,
2591        0,
2592        0,
2593        0,
2594        0,
2595 /*54*/ MatFDColoringCreate_MPIBAIJ,
2596        0,
2597        MatSetUnfactored_MPIBAIJ,
2598        MatPermute_MPIBAIJ,
2599        MatSetValuesBlocked_MPIBAIJ,
2600 /*59*/ MatGetSubMatrix_MPIBAIJ,
2601        MatDestroy_MPIBAIJ,
2602        MatView_MPIBAIJ,
2603        0,
2604        0,
2605 /*64*/ 0,
2606        0,
2607        0,
2608        0,
2609        0,
2610 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2611        0,
2612        0,
2613        0,
2614        0,
2615 /*74*/ 0,
2616        MatFDColoringApply_BAIJ,
2617        0,
2618        0,
2619        0,
2620 /*79*/ 0,
2621        0,
2622        0,
2623        0,
2624        MatLoad_MPIBAIJ,
2625 /*84*/ 0,
2626        0,
2627        0,
2628        0,
2629        0,
2630 /*89*/ 0,
2631        0,
2632        0,
2633        0,
2634        0,
2635 /*94*/ 0,
2636        0,
2637        0,
2638        0,
2639        0,
2640 /*99*/ 0,
2641        0,
2642        0,
2643        0,
2644        0,
2645 /*104*/0,
2646        MatRealPart_MPIBAIJ,
2647        MatImaginaryPart_MPIBAIJ,
2648        0,
2649        0,
2650 /*109*/0,
2651        0,
2652        0,
2653        0,
2654        0,
2655 /*114*/MatGetSeqNonzerostructure_MPIBAIJ,
2656        0,
2657        MatGetGhosts_MPIBAIJ,
2658        0,
2659        0,
2660 /*119*/0,
2661        0,
2662        0,
2663        0
2664 };
2665 
2666 EXTERN_C_BEGIN
2667 #undef __FUNCT__
2668 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2669 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2670 {
2671   PetscFunctionBegin;
2672   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2673   *iscopy = PETSC_FALSE;
2674   PetscFunctionReturn(0);
2675 }
2676 EXTERN_C_END
2677 
2678 EXTERN_C_BEGIN
2679 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2680 EXTERN_C_END
2681 
2682 EXTERN_C_BEGIN
2683 #undef __FUNCT__
2684 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2685 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2686 {
2687   PetscInt       m,rstart,cstart,cend;
2688   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2689   const PetscInt *JJ=0;
2690   PetscScalar    *values=0;
2691   PetscErrorCode ierr;
2692 
2693   PetscFunctionBegin;
2694 
2695   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2696   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2697   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2698   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2699   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2700   m      = B->rmap->n/bs;
2701   rstart = B->rmap->rstart/bs;
2702   cstart = B->cmap->rstart/bs;
2703   cend   = B->cmap->rend/bs;
2704 
2705   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2706   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2707   for (i=0; i<m; i++) {
2708     nz = ii[i+1] - ii[i];
2709     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2710     nz_max = PetscMax(nz_max,nz);
2711     JJ  = jj + ii[i];
2712     for (j=0; j<nz; j++) {
2713       if (*JJ >= cstart) break;
2714       JJ++;
2715     }
2716     d = 0;
2717     for (; j<nz; j++) {
2718       if (*JJ++ >= cend) break;
2719       d++;
2720     }
2721     d_nnz[i] = d;
2722     o_nnz[i] = nz - d;
2723   }
2724   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2725   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2726 
2727   values = (PetscScalar*)V;
2728   if (!values) {
2729     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2730     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2731   }
2732   for (i=0; i<m; i++) {
2733     PetscInt          row    = i + rstart;
2734     PetscInt          ncols  = ii[i+1] - ii[i];
2735     const PetscInt    *icols = jj + ii[i];
2736     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2737     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2738   }
2739 
2740   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2741   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2742   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2743 
2744   PetscFunctionReturn(0);
2745 }
2746 EXTERN_C_END
2747 
2748 #undef __FUNCT__
2749 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2750 /*@C
2751    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2752    (the default parallel PETSc format).
2753 
2754    Collective on MPI_Comm
2755 
2756    Input Parameters:
2757 +  A - the matrix
2758 .  i - the indices into j for the start of each local row (starts with zero)
2759 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2760 -  v - optional values in the matrix
2761 
2762    Level: developer
2763 
2764 .keywords: matrix, aij, compressed row, sparse, parallel
2765 
2766 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2767 @*/
2768 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2769 {
2770   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2771 
2772   PetscFunctionBegin;
2773   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2774   if (f) {
2775     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2776   }
2777   PetscFunctionReturn(0);
2778 }
2779 
2780 EXTERN_C_BEGIN
2781 #undef __FUNCT__
2782 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2783 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2784 {
2785   Mat_MPIBAIJ    *b;
2786   PetscErrorCode ierr;
2787   PetscInt       i, newbs = PetscAbs(bs);
2788 
2789   PetscFunctionBegin;
2790   if (bs < 0) {
2791     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2792       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2793     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2794     bs   = PetscAbs(bs);
2795   }
2796   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
2797   bs = newbs;
2798 
2799 
2800   if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2801   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2802   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2803   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2804   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2805 
2806   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2807   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2808   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2809   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2810 
2811   if (d_nnz) {
2812     for (i=0; i<B->rmap->n/bs; i++) {
2813       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2814     }
2815   }
2816   if (o_nnz) {
2817     for (i=0; i<B->rmap->n/bs; i++) {
2818       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2819     }
2820   }
2821 
2822   b = (Mat_MPIBAIJ*)B->data;
2823   b->bs2 = bs*bs;
2824   b->mbs = B->rmap->n/bs;
2825   b->nbs = B->cmap->n/bs;
2826   b->Mbs = B->rmap->N/bs;
2827   b->Nbs = B->cmap->N/bs;
2828 
2829   for (i=0; i<=b->size; i++) {
2830     b->rangebs[i] = B->rmap->range[i]/bs;
2831   }
2832   b->rstartbs = B->rmap->rstart/bs;
2833   b->rendbs   = B->rmap->rend/bs;
2834   b->cstartbs = B->cmap->rstart/bs;
2835   b->cendbs   = B->cmap->rend/bs;
2836 
2837   if (!B->preallocated) {
2838     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2839     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2840     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2841     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2842     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2843     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2844     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2845     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2846     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
2847   }
2848 
2849   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2850   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2851   B->preallocated = PETSC_TRUE;
2852   PetscFunctionReturn(0);
2853 }
2854 EXTERN_C_END
2855 
2856 EXTERN_C_BEGIN
2857 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2858 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2859 EXTERN_C_END
2860 
2861 
2862 EXTERN_C_BEGIN
2863 #undef __FUNCT__
2864 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2865 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
2866 {
2867   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2868   PetscErrorCode ierr;
2869   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2870   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2871   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2872 
2873   PetscFunctionBegin;
2874   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
2875   ii[0] = 0;
2876   CHKMEMQ;
2877   for (i=0; i<M; i++) {
2878     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2879     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2880     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2881     /* remove one from count of matrix has diagonal */
2882     for (j=id[i]; j<id[i+1]; j++) {
2883       if (jd[j] == i) {ii[i+1]--;break;}
2884     }
2885   CHKMEMQ;
2886   }
2887   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2888   cnt = 0;
2889   for (i=0; i<M; i++) {
2890     for (j=io[i]; j<io[i+1]; j++) {
2891       if (garray[jo[j]] > rstart) break;
2892       jj[cnt++] = garray[jo[j]];
2893   CHKMEMQ;
2894     }
2895     for (k=id[i]; k<id[i+1]; k++) {
2896       if (jd[k] != i) {
2897         jj[cnt++] = rstart + jd[k];
2898   CHKMEMQ;
2899       }
2900     }
2901     for (;j<io[i+1]; j++) {
2902       jj[cnt++] = garray[jo[j]];
2903   CHKMEMQ;
2904     }
2905   }
2906   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
2907   PetscFunctionReturn(0);
2908 }
2909 EXTERN_C_END
2910 
2911 EXTERN_C_BEGIN
2912 #if defined(PETSC_HAVE_MUMPS)
2913 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
2914 #endif
2915 EXTERN_C_END
2916 
2917 /*MC
2918    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2919 
2920    Options Database Keys:
2921 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2922 . -mat_block_size <bs> - set the blocksize used to store the matrix
2923 - -mat_use_hash_table <fact>
2924 
2925   Level: beginner
2926 
2927 .seealso: MatCreateMPIBAIJ
2928 M*/
2929 
2930 EXTERN_C_BEGIN
2931 #undef __FUNCT__
2932 #define __FUNCT__ "MatCreate_MPIBAIJ"
2933 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2934 {
2935   Mat_MPIBAIJ    *b;
2936   PetscErrorCode ierr;
2937   PetscTruth     flg;
2938 
2939   PetscFunctionBegin;
2940   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2941   B->data = (void*)b;
2942 
2943 
2944   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2945   B->mapping    = 0;
2946   B->assembled  = PETSC_FALSE;
2947 
2948   B->insertmode = NOT_SET_VALUES;
2949   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
2950   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
2951 
2952   /* build local table of row and column ownerships */
2953   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
2954 
2955   /* build cache for off array entries formed */
2956   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
2957   b->donotstash  = PETSC_FALSE;
2958   b->colmap      = PETSC_NULL;
2959   b->garray      = PETSC_NULL;
2960   b->roworiented = PETSC_TRUE;
2961 
2962   /* stuff used in block assembly */
2963   b->barray       = 0;
2964 
2965   /* stuff used for matrix vector multiply */
2966   b->lvec         = 0;
2967   b->Mvctx        = 0;
2968 
2969   /* stuff for MatGetRow() */
2970   b->rowindices   = 0;
2971   b->rowvalues    = 0;
2972   b->getrowactive = PETSC_FALSE;
2973 
2974   /* hash table stuff */
2975   b->ht           = 0;
2976   b->hd           = 0;
2977   b->ht_size      = 0;
2978   b->ht_flag      = PETSC_FALSE;
2979   b->ht_fact      = 0;
2980   b->ht_total_ct  = 0;
2981   b->ht_insert_ct = 0;
2982 
2983   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2984     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2985     if (flg) {
2986       PetscReal fact = 1.39;
2987       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2988       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
2989       if (fact <= 1.0) fact = 1.39;
2990       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2991       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2992     }
2993   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2994 
2995 #if defined(PETSC_HAVE_MUMPS)
2996   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
2997 #endif
2998   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
2999                                      "MatConvert_MPIBAIJ_MPIAdj",
3000                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3001   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3002                                      "MatStoreValues_MPIBAIJ",
3003                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3004   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3005                                      "MatRetrieveValues_MPIBAIJ",
3006                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3007   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3008                                      "MatGetDiagonalBlock_MPIBAIJ",
3009                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3010   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3011                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3012                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3013   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3014 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3015 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3016   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3017                                      "MatDiagonalScaleLocal_MPIBAIJ",
3018                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3019   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3020                                      "MatSetHashTableFactor_MPIBAIJ",
3021                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3022   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3023   PetscFunctionReturn(0);
3024 }
3025 EXTERN_C_END
3026 
3027 /*MC
3028    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3029 
3030    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3031    and MATMPIBAIJ otherwise.
3032 
3033    Options Database Keys:
3034 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3035 
3036   Level: beginner
3037 
3038 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3039 M*/
3040 
3041 EXTERN_C_BEGIN
3042 #undef __FUNCT__
3043 #define __FUNCT__ "MatCreate_BAIJ"
3044 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
3045 {
3046   PetscErrorCode ierr;
3047   PetscMPIInt    size;
3048 
3049   PetscFunctionBegin;
3050   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3051   if (size == 1) {
3052     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
3053   } else {
3054     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
3055   }
3056   PetscFunctionReturn(0);
3057 }
3058 EXTERN_C_END
3059 
3060 #undef __FUNCT__
3061 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3062 /*@C
3063    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3064    (block compressed row).  For good matrix assembly performance
3065    the user should preallocate the matrix storage by setting the parameters
3066    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3067    performance can be increased by more than a factor of 50.
3068 
3069    Collective on Mat
3070 
3071    Input Parameters:
3072 +  A - the matrix
3073 .  bs   - size of blockk
3074 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3075            submatrix  (same for all local rows)
3076 .  d_nnz - array containing the number of block nonzeros in the various block rows
3077            of the in diagonal portion of the local (possibly different for each block
3078            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3079 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3080            submatrix (same for all local rows).
3081 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3082            off-diagonal portion of the local submatrix (possibly different for
3083            each block row) or PETSC_NULL.
3084 
3085    If the *_nnz parameter is given then the *_nz parameter is ignored
3086 
3087    Options Database Keys:
3088 +   -mat_block_size - size of the blocks to use
3089 -   -mat_use_hash_table <fact>
3090 
3091    Notes:
3092    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3093    than it must be used on all processors that share the object for that argument.
3094 
3095    Storage Information:
3096    For a square global matrix we define each processor's diagonal portion
3097    to be its local rows and the corresponding columns (a square submatrix);
3098    each processor's off-diagonal portion encompasses the remainder of the
3099    local matrix (a rectangular submatrix).
3100 
3101    The user can specify preallocated storage for the diagonal part of
3102    the local submatrix with either d_nz or d_nnz (not both).  Set
3103    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3104    memory allocation.  Likewise, specify preallocated storage for the
3105    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3106 
3107    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3108    the figure below we depict these three local rows and all columns (0-11).
3109 
3110 .vb
3111            0 1 2 3 4 5 6 7 8 9 10 11
3112           -------------------
3113    row 3  |  o o o d d d o o o o o o
3114    row 4  |  o o o d d d o o o o o o
3115    row 5  |  o o o d d d o o o o o o
3116           -------------------
3117 .ve
3118 
3119    Thus, any entries in the d locations are stored in the d (diagonal)
3120    submatrix, and any entries in the o locations are stored in the
3121    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3122    stored simply in the MATSEQBAIJ format for compressed row storage.
3123 
3124    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3125    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3126    In general, for PDE problems in which most nonzeros are near the diagonal,
3127    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3128    or you will get TERRIBLE performance; see the users' manual chapter on
3129    matrices.
3130 
3131    You can call MatGetInfo() to get information on how effective the preallocation was;
3132    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3133    You can also run with the option -info and look for messages with the string
3134    malloc in them to see if additional memory allocation was needed.
3135 
3136    Level: intermediate
3137 
3138 .keywords: matrix, block, aij, compressed row, sparse, parallel
3139 
3140 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3141 @*/
3142 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3143 {
3144   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3145 
3146   PetscFunctionBegin;
3147   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3148   if (f) {
3149     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3150   }
3151   PetscFunctionReturn(0);
3152 }
3153 
3154 #undef __FUNCT__
3155 #define __FUNCT__ "MatCreateMPIBAIJ"
3156 /*@C
3157    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3158    (block compressed row).  For good matrix assembly performance
3159    the user should preallocate the matrix storage by setting the parameters
3160    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3161    performance can be increased by more than a factor of 50.
3162 
3163    Collective on MPI_Comm
3164 
3165    Input Parameters:
3166 +  comm - MPI communicator
3167 .  bs   - size of blockk
3168 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3169            This value should be the same as the local size used in creating the
3170            y vector for the matrix-vector product y = Ax.
3171 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3172            This value should be the same as the local size used in creating the
3173            x vector for the matrix-vector product y = Ax.
3174 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3175 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3176 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3177            submatrix  (same for all local rows)
3178 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3179            of the in diagonal portion of the local (possibly different for each block
3180            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3181 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3182            submatrix (same for all local rows).
3183 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3184            off-diagonal portion of the local submatrix (possibly different for
3185            each block row) or PETSC_NULL.
3186 
3187    Output Parameter:
3188 .  A - the matrix
3189 
3190    Options Database Keys:
3191 +   -mat_block_size - size of the blocks to use
3192 -   -mat_use_hash_table <fact>
3193 
3194    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3195    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3196    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3197 
3198    Notes:
3199    If the *_nnz parameter is given then the *_nz parameter is ignored
3200 
3201    A nonzero block is any block that as 1 or more nonzeros in it
3202 
3203    The user MUST specify either the local or global matrix dimensions
3204    (possibly both).
3205 
3206    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3207    than it must be used on all processors that share the object for that argument.
3208 
3209    Storage Information:
3210    For a square global matrix we define each processor's diagonal portion
3211    to be its local rows and the corresponding columns (a square submatrix);
3212    each processor's off-diagonal portion encompasses the remainder of the
3213    local matrix (a rectangular submatrix).
3214 
3215    The user can specify preallocated storage for the diagonal part of
3216    the local submatrix with either d_nz or d_nnz (not both).  Set
3217    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3218    memory allocation.  Likewise, specify preallocated storage for the
3219    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3220 
3221    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3222    the figure below we depict these three local rows and all columns (0-11).
3223 
3224 .vb
3225            0 1 2 3 4 5 6 7 8 9 10 11
3226           -------------------
3227    row 3  |  o o o d d d o o o o o o
3228    row 4  |  o o o d d d o o o o o o
3229    row 5  |  o o o d d d o o o o o o
3230           -------------------
3231 .ve
3232 
3233    Thus, any entries in the d locations are stored in the d (diagonal)
3234    submatrix, and any entries in the o locations are stored in the
3235    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3236    stored simply in the MATSEQBAIJ format for compressed row storage.
3237 
3238    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3239    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3240    In general, for PDE problems in which most nonzeros are near the diagonal,
3241    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3242    or you will get TERRIBLE performance; see the users' manual chapter on
3243    matrices.
3244 
3245    Level: intermediate
3246 
3247 .keywords: matrix, block, aij, compressed row, sparse, parallel
3248 
3249 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3250 @*/
3251 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3252 {
3253   PetscErrorCode ierr;
3254   PetscMPIInt    size;
3255 
3256   PetscFunctionBegin;
3257   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3258   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3259   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3260   if (size > 1) {
3261     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3262     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3263   } else {
3264     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3265     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3266   }
3267   PetscFunctionReturn(0);
3268 }
3269 
3270 #undef __FUNCT__
3271 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3272 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3273 {
3274   Mat            mat;
3275   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3276   PetscErrorCode ierr;
3277   PetscInt       len=0;
3278 
3279   PetscFunctionBegin;
3280   *newmat       = 0;
3281   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3282   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3283   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3284   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3285 
3286   mat->factortype   = matin->factortype;
3287   mat->preallocated = PETSC_TRUE;
3288   mat->assembled    = PETSC_TRUE;
3289   mat->insertmode   = NOT_SET_VALUES;
3290 
3291   a      = (Mat_MPIBAIJ*)mat->data;
3292   mat->rmap->bs  = matin->rmap->bs;
3293   a->bs2   = oldmat->bs2;
3294   a->mbs   = oldmat->mbs;
3295   a->nbs   = oldmat->nbs;
3296   a->Mbs   = oldmat->Mbs;
3297   a->Nbs   = oldmat->Nbs;
3298 
3299   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3300   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3301 
3302   a->size         = oldmat->size;
3303   a->rank         = oldmat->rank;
3304   a->donotstash   = oldmat->donotstash;
3305   a->roworiented  = oldmat->roworiented;
3306   a->rowindices   = 0;
3307   a->rowvalues    = 0;
3308   a->getrowactive = PETSC_FALSE;
3309   a->barray       = 0;
3310   a->rstartbs     = oldmat->rstartbs;
3311   a->rendbs       = oldmat->rendbs;
3312   a->cstartbs     = oldmat->cstartbs;
3313   a->cendbs       = oldmat->cendbs;
3314 
3315   /* hash table stuff */
3316   a->ht           = 0;
3317   a->hd           = 0;
3318   a->ht_size      = 0;
3319   a->ht_flag      = oldmat->ht_flag;
3320   a->ht_fact      = oldmat->ht_fact;
3321   a->ht_total_ct  = 0;
3322   a->ht_insert_ct = 0;
3323 
3324   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3325   if (oldmat->colmap) {
3326 #if defined (PETSC_USE_CTABLE)
3327   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3328 #else
3329   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3330   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3331   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3332 #endif
3333   } else a->colmap = 0;
3334 
3335   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3336     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3337     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3338     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3339   } else a->garray = 0;
3340 
3341   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3342   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3343   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3344   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3345   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3346 
3347   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3348   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3349   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3350   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3351   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3352   *newmat = mat;
3353 
3354   PetscFunctionReturn(0);
3355 }
3356 
3357 #undef __FUNCT__
3358 #define __FUNCT__ "MatLoad_MPIBAIJ"
3359 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3360 {
3361   PetscErrorCode ierr;
3362   int            fd;
3363   PetscInt       i,nz,j,rstart,rend;
3364   PetscScalar    *vals,*buf;
3365   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3366   MPI_Status     status;
3367   PetscMPIInt    rank,size,maxnz;
3368   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3369   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3370   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3371   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3372   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3373   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3374 
3375   PetscFunctionBegin;
3376   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3377     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3378   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3379 
3380   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3381   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3382   if (!rank) {
3383     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3384     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3385     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3386   }
3387 
3388   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3389 
3390   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3391   M = header[1]; N = header[2];
3392 
3393   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3394   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3395   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3396 
3397   /* If global sizes are set, check if they are consistent with that given in the file */
3398   if (sizesset) {
3399     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3400   }
3401   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3402   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3403 
3404   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3405 
3406   /*
3407      This code adds extra rows to make sure the number of rows is
3408      divisible by the blocksize
3409   */
3410   Mbs        = M/bs;
3411   extra_rows = bs - M + bs*Mbs;
3412   if (extra_rows == bs) extra_rows = 0;
3413   else                  Mbs++;
3414   if (extra_rows && !rank) {
3415     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3416   }
3417 
3418   /* determine ownership of all rows */
3419   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3420     mbs        = Mbs/size + ((Mbs % size) > rank);
3421     m          = mbs*bs;
3422   } else { /* User set */
3423     m          = newmat->rmap->n;
3424     mbs        = m/bs;
3425   }
3426   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3427   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3428 
3429   /* process 0 needs enough room for process with most rows */
3430   if (!rank) {
3431     mmax = rowners[1];
3432     for (i=2; i<size; i++) {
3433       mmax = PetscMax(mmax,rowners[i]);
3434     }
3435     mmax*=bs;
3436   } else mmax = m;
3437 
3438   rowners[0] = 0;
3439   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3440   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3441   rstart = rowners[rank];
3442   rend   = rowners[rank+1];
3443 
3444   /* distribute row lengths to all processors */
3445   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3446   if (!rank) {
3447     mend = m;
3448     if (size == 1) mend = mend - extra_rows;
3449     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3450     for (j=mend; j<m; j++) locrowlens[j] = 1;
3451     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3452     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3453     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3454     for (j=0; j<m; j++) {
3455       procsnz[0] += locrowlens[j];
3456     }
3457     for (i=1; i<size; i++) {
3458       mend = browners[i+1] - browners[i];
3459       if (i == size-1) mend = mend - extra_rows;
3460       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3461       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3462       /* calculate the number of nonzeros on each processor */
3463       for (j=0; j<browners[i+1]-browners[i]; j++) {
3464         procsnz[i] += rowlengths[j];
3465       }
3466       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3467     }
3468     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3469   } else {
3470     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3471   }
3472 
3473   if (!rank) {
3474     /* determine max buffer needed and allocate it */
3475     maxnz = procsnz[0];
3476     for (i=1; i<size; i++) {
3477       maxnz = PetscMax(maxnz,procsnz[i]);
3478     }
3479     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3480 
3481     /* read in my part of the matrix column indices  */
3482     nz     = procsnz[0];
3483     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3484     mycols = ibuf;
3485     if (size == 1)  nz -= extra_rows;
3486     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3487     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3488 
3489     /* read in every ones (except the last) and ship off */
3490     for (i=1; i<size-1; i++) {
3491       nz   = procsnz[i];
3492       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3493       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3494     }
3495     /* read in the stuff for the last proc */
3496     if (size != 1) {
3497       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3498       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3499       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3500       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3501     }
3502     ierr = PetscFree(cols);CHKERRQ(ierr);
3503   } else {
3504     /* determine buffer space needed for message */
3505     nz = 0;
3506     for (i=0; i<m; i++) {
3507       nz += locrowlens[i];
3508     }
3509     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3510     mycols = ibuf;
3511     /* receive message of column indices*/
3512     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3513     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3514     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3515   }
3516 
3517   /* loop over local rows, determining number of off diagonal entries */
3518   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3519   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3520   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3521   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3522   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3523   rowcount = 0; nzcount = 0;
3524   for (i=0; i<mbs; i++) {
3525     dcount  = 0;
3526     odcount = 0;
3527     for (j=0; j<bs; j++) {
3528       kmax = locrowlens[rowcount];
3529       for (k=0; k<kmax; k++) {
3530         tmp = mycols[nzcount++]/bs;
3531         if (!mask[tmp]) {
3532           mask[tmp] = 1;
3533           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3534           else masked1[dcount++] = tmp;
3535         }
3536       }
3537       rowcount++;
3538     }
3539 
3540     dlens[i]  = dcount;
3541     odlens[i] = odcount;
3542 
3543     /* zero out the mask elements we set */
3544     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3545     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3546   }
3547 
3548 
3549   if (!sizesset) {
3550     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3551   }
3552   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3553 
3554   if (!rank) {
3555     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3556     /* read in my part of the matrix numerical values  */
3557     nz = procsnz[0];
3558     vals = buf;
3559     mycols = ibuf;
3560     if (size == 1)  nz -= extra_rows;
3561     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3562     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3563 
3564     /* insert into matrix */
3565     jj      = rstart*bs;
3566     for (i=0; i<m; i++) {
3567       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3568       mycols += locrowlens[i];
3569       vals   += locrowlens[i];
3570       jj++;
3571     }
3572     /* read in other processors (except the last one) and ship out */
3573     for (i=1; i<size-1; i++) {
3574       nz   = procsnz[i];
3575       vals = buf;
3576       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3577       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3578     }
3579     /* the last proc */
3580     if (size != 1){
3581       nz   = procsnz[i] - extra_rows;
3582       vals = buf;
3583       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3584       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3585       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3586     }
3587     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3588   } else {
3589     /* receive numeric values */
3590     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3591 
3592     /* receive message of values*/
3593     vals   = buf;
3594     mycols = ibuf;
3595     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
3596     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3597     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3598 
3599     /* insert into matrix */
3600     jj      = rstart*bs;
3601     for (i=0; i<m; i++) {
3602       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3603       mycols += locrowlens[i];
3604       vals   += locrowlens[i];
3605       jj++;
3606     }
3607   }
3608   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3609   ierr = PetscFree(buf);CHKERRQ(ierr);
3610   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3611   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3612   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3613   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3614   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3615   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3616 
3617   PetscFunctionReturn(0);
3618 }
3619 
3620 #undef __FUNCT__
3621 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3622 /*@
3623    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3624 
3625    Input Parameters:
3626 .  mat  - the matrix
3627 .  fact - factor
3628 
3629    Not Collective, each process can use a different factor
3630 
3631    Level: advanced
3632 
3633   Notes:
3634    This can also be set by the command line option: -mat_use_hash_table <fact>
3635 
3636 .keywords: matrix, hashtable, factor, HT
3637 
3638 .seealso: MatSetOption()
3639 @*/
3640 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3641 {
3642   PetscErrorCode ierr,(*f)(Mat,PetscReal);
3643 
3644   PetscFunctionBegin;
3645   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
3646   if (f) {
3647     ierr = (*f)(mat,fact);CHKERRQ(ierr);
3648   }
3649   PetscFunctionReturn(0);
3650 }
3651 
3652 EXTERN_C_BEGIN
3653 #undef __FUNCT__
3654 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3655 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3656 {
3657   Mat_MPIBAIJ *baij;
3658 
3659   PetscFunctionBegin;
3660   baij = (Mat_MPIBAIJ*)mat->data;
3661   baij->ht_fact = fact;
3662   PetscFunctionReturn(0);
3663 }
3664 EXTERN_C_END
3665 
3666 #undef __FUNCT__
3667 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3668 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3669 {
3670   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3671   PetscFunctionBegin;
3672   *Ad     = a->A;
3673   *Ao     = a->B;
3674   *colmap = a->garray;
3675   PetscFunctionReturn(0);
3676 }
3677 
3678 /*
3679     Special version for direct calls from Fortran (to eliminate two function call overheads
3680 */
3681 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3682 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3683 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3684 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3685 #endif
3686 
3687 #undef __FUNCT__
3688 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3689 /*@C
3690   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3691 
3692   Collective on Mat
3693 
3694   Input Parameters:
3695 + mat - the matrix
3696 . min - number of input rows
3697 . im - input rows
3698 . nin - number of input columns
3699 . in - input columns
3700 . v - numerical values input
3701 - addvin - INSERT_VALUES or ADD_VALUES
3702 
3703   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3704 
3705   Level: advanced
3706 
3707 .seealso:   MatSetValuesBlocked()
3708 @*/
3709 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3710 {
3711   /* convert input arguments to C version */
3712   Mat             mat = *matin;
3713   PetscInt        m = *min, n = *nin;
3714   InsertMode      addv = *addvin;
3715 
3716   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3717   const MatScalar *value;
3718   MatScalar       *barray=baij->barray;
3719   PetscTruth      roworiented = baij->roworiented;
3720   PetscErrorCode  ierr;
3721   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3722   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3723   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3724 
3725   PetscFunctionBegin;
3726   /* tasks normally handled by MatSetValuesBlocked() */
3727   if (mat->insertmode == NOT_SET_VALUES) {
3728     mat->insertmode = addv;
3729   }
3730 #if defined(PETSC_USE_DEBUG)
3731   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3732   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3733 #endif
3734   if (mat->assembled) {
3735     mat->was_assembled = PETSC_TRUE;
3736     mat->assembled     = PETSC_FALSE;
3737   }
3738   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3739 
3740 
3741   if(!barray) {
3742     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3743     baij->barray = barray;
3744   }
3745 
3746   if (roworiented) {
3747     stepval = (n-1)*bs;
3748   } else {
3749     stepval = (m-1)*bs;
3750   }
3751   for (i=0; i<m; i++) {
3752     if (im[i] < 0) continue;
3753 #if defined(PETSC_USE_DEBUG)
3754     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3755 #endif
3756     if (im[i] >= rstart && im[i] < rend) {
3757       row = im[i] - rstart;
3758       for (j=0; j<n; j++) {
3759         /* If NumCol = 1 then a copy is not required */
3760         if ((roworiented) && (n == 1)) {
3761           barray = (MatScalar*)v + i*bs2;
3762         } else if((!roworiented) && (m == 1)) {
3763           barray = (MatScalar*)v + j*bs2;
3764         } else { /* Here a copy is required */
3765           if (roworiented) {
3766             value = v + i*(stepval+bs)*bs + j*bs;
3767           } else {
3768             value = v + j*(stepval+bs)*bs + i*bs;
3769           }
3770           for (ii=0; ii<bs; ii++,value+=stepval) {
3771             for (jj=0; jj<bs; jj++) {
3772               *barray++  = *value++;
3773             }
3774           }
3775           barray -=bs2;
3776         }
3777 
3778         if (in[j] >= cstart && in[j] < cend){
3779           col  = in[j] - cstart;
3780           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3781         }
3782         else if (in[j] < 0) continue;
3783 #if defined(PETSC_USE_DEBUG)
3784         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3785 #endif
3786         else {
3787           if (mat->was_assembled) {
3788             if (!baij->colmap) {
3789               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3790             }
3791 
3792 #if defined(PETSC_USE_DEBUG)
3793 #if defined (PETSC_USE_CTABLE)
3794             { PetscInt data;
3795               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3796               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3797             }
3798 #else
3799             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3800 #endif
3801 #endif
3802 #if defined (PETSC_USE_CTABLE)
3803 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3804             col  = (col - 1)/bs;
3805 #else
3806             col = (baij->colmap[in[j]] - 1)/bs;
3807 #endif
3808             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3809               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3810               col =  in[j];
3811             }
3812           }
3813           else col = in[j];
3814           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3815         }
3816       }
3817     } else {
3818       if (!baij->donotstash) {
3819         if (roworiented) {
3820           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3821         } else {
3822           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3823         }
3824       }
3825     }
3826   }
3827 
3828   /* task normally handled by MatSetValuesBlocked() */
3829   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3830   PetscFunctionReturn(0);
3831 }
3832