xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 22612f2f7cceb60caedd65384cdf99fc989f2aeb)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = a->ignorezeroentries;
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatGetValues_MPIAIJ"
240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
241 {
242   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
243   PetscErrorCode ierr;
244   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
245   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
246 
247   PetscFunctionBegin;
248   for (i=0; i<m; i++) {
249     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
250     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
251     if (idxm[i] >= rstart && idxm[i] < rend) {
252       row = idxm[i] - rstart;
253       for (j=0; j<n; j++) {
254         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
255         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
256         if (idxn[j] >= cstart && idxn[j] < cend){
257           col = idxn[j] - cstart;
258           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
259         } else {
260           if (!aij->colmap) {
261             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
262           }
263 #if defined (PETSC_USE_CTABLE)
264           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
265           col --;
266 #else
267           col = aij->colmap[idxn[j]] - 1;
268 #endif
269           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
270           else {
271             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
272           }
273         }
274       }
275     } else {
276       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
277     }
278   }
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
287   PetscErrorCode ierr;
288   PetscInt       nstash,reallocs;
289   InsertMode     addv;
290 
291   PetscFunctionBegin;
292   if (aij->donotstash) {
293     PetscFunctionReturn(0);
294   }
295 
296   /* make sure all processors are either in INSERTMODE or ADDMODE */
297   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
298   if (addv == (ADD_VALUES|INSERT_VALUES)) {
299     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
300   }
301   mat->insertmode = addv; /* in case this processor had no cache */
302 
303   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
304   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
305   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
312 {
313   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
314   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
315   PetscErrorCode ierr;
316   PetscMPIInt    n;
317   PetscInt       i,j,rstart,ncols,flg;
318   PetscInt       *row,*col,other_disassembled;
319   PetscScalar    *val;
320   InsertMode     addv = mat->insertmode;
321 
322   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
323   PetscFunctionBegin;
324   if (!aij->donotstash) {
325     while (1) {
326       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
327       if (!flg) break;
328 
329       for (i=0; i<n;) {
330         /* Now identify the consecutive vals belonging to the same row */
331         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
332         if (j < n) ncols = j-i;
333         else       ncols = n-i;
334         /* Now assemble all these values with a single function call */
335         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
336         i = j;
337       }
338     }
339     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
340   }
341   a->compressedrow.use     = PETSC_FALSE;
342   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
344 
345   /* determine if any processor has disassembled, if so we must
346      also disassemble ourselfs, in order that we may reassemble. */
347   /*
348      if nonzero structure of submatrix B cannot change then we know that
349      no processor disassembled thus we can skip this stuff
350   */
351   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
352     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
353     if (mat->was_assembled && !other_disassembled) {
354       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
355     }
356   }
357   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
358     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
359   }
360   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
361   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
362   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
363   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
364 
365   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
366   aij->rowvalues = 0;
367 
368   /* used by MatAXPY() */
369   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
370   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
371 
372   PetscFunctionReturn(0);
373 }
374 
375 #undef __FUNCT__
376 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
377 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
378 {
379   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
380   PetscErrorCode ierr;
381 
382   PetscFunctionBegin;
383   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
384   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
385   PetscFunctionReturn(0);
386 }
387 
388 #undef __FUNCT__
389 #define __FUNCT__ "MatZeroRows_MPIAIJ"
390 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
391 {
392   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
393   PetscErrorCode ierr;
394   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
395   PetscInt       i,*owners = A->rmap.range;
396   PetscInt       *nprocs,j,idx,nsends,row;
397   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
398   PetscInt       *rvalues,count,base,slen,*source;
399   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
400   MPI_Comm       comm = A->comm;
401   MPI_Request    *send_waits,*recv_waits;
402   MPI_Status     recv_status,*send_status;
403 #if defined(PETSC_DEBUG)
404   PetscTruth     found = PETSC_FALSE;
405 #endif
406 
407   PetscFunctionBegin;
408   /*  first count number of contributors to each processor */
409   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
410   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
411   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
412   j = 0;
413   for (i=0; i<N; i++) {
414     if (lastidx > (idx = rows[i])) j = 0;
415     lastidx = idx;
416     for (; j<size; j++) {
417       if (idx >= owners[j] && idx < owners[j+1]) {
418         nprocs[2*j]++;
419         nprocs[2*j+1] = 1;
420         owner[i] = j;
421 #if defined(PETSC_DEBUG)
422         found = PETSC_TRUE;
423 #endif
424         break;
425       }
426     }
427 #if defined(PETSC_DEBUG)
428     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
429     found = PETSC_FALSE;
430 #endif
431   }
432   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
433 
434   /* inform other processors of number of messages and max length*/
435   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
436 
437   /* post receives:   */
438   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
439   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
440   for (i=0; i<nrecvs; i++) {
441     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
442   }
443 
444   /* do sends:
445       1) starts[i] gives the starting index in svalues for stuff going to
446          the ith processor
447   */
448   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
449   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
450   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
451   starts[0] = 0;
452   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
453   for (i=0; i<N; i++) {
454     svalues[starts[owner[i]]++] = rows[i];
455   }
456 
457   starts[0] = 0;
458   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
459   count = 0;
460   for (i=0; i<size; i++) {
461     if (nprocs[2*i+1]) {
462       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
463     }
464   }
465   ierr = PetscFree(starts);CHKERRQ(ierr);
466 
467   base = owners[rank];
468 
469   /*  wait on receives */
470   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
471   source = lens + nrecvs;
472   count  = nrecvs; slen = 0;
473   while (count) {
474     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
475     /* unpack receives into our local space */
476     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
477     source[imdex]  = recv_status.MPI_SOURCE;
478     lens[imdex]    = n;
479     slen          += n;
480     count--;
481   }
482   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
483 
484   /* move the data into the send scatter */
485   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
486   count = 0;
487   for (i=0; i<nrecvs; i++) {
488     values = rvalues + i*nmax;
489     for (j=0; j<lens[i]; j++) {
490       lrows[count++] = values[j] - base;
491     }
492   }
493   ierr = PetscFree(rvalues);CHKERRQ(ierr);
494   ierr = PetscFree(lens);CHKERRQ(ierr);
495   ierr = PetscFree(owner);CHKERRQ(ierr);
496   ierr = PetscFree(nprocs);CHKERRQ(ierr);
497 
498   /* actually zap the local rows */
499   /*
500         Zero the required rows. If the "diagonal block" of the matrix
501      is square and the user wishes to set the diagonal we use separate
502      code so that MatSetValues() is not called for each diagonal allocating
503      new memory, thus calling lots of mallocs and slowing things down.
504 
505        Contributed by: Matthew Knepley
506   */
507   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
508   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
509   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
510     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
511   } else if (diag != 0.0) {
512     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
513     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
514       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
515 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
516     }
517     for (i = 0; i < slen; i++) {
518       row  = lrows[i] + rstart;
519       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
520     }
521     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
522     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
523   } else {
524     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
525   }
526   ierr = PetscFree(lrows);CHKERRQ(ierr);
527 
528   /* wait on sends */
529   if (nsends) {
530     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
531     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
532     ierr = PetscFree(send_status);CHKERRQ(ierr);
533   }
534   ierr = PetscFree(send_waits);CHKERRQ(ierr);
535   ierr = PetscFree(svalues);CHKERRQ(ierr);
536 
537   PetscFunctionReturn(0);
538 }
539 
540 #undef __FUNCT__
541 #define __FUNCT__ "MatMult_MPIAIJ"
542 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
543 {
544   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
545   PetscErrorCode ierr;
546   PetscInt       nt;
547 
548   PetscFunctionBegin;
549   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
550   if (nt != A->cmap.n) {
551     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
552   }
553   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
554   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
555   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
556   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatMultAdd_MPIAIJ"
562 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
563 {
564   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
565   PetscErrorCode ierr;
566 
567   PetscFunctionBegin;
568   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
569   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
570   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
571   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 #undef __FUNCT__
576 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
577 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
578 {
579   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
580   PetscErrorCode ierr;
581   PetscTruth     merged;
582 
583   PetscFunctionBegin;
584   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
585   /* do nondiagonal part */
586   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
587   if (!merged) {
588     /* send it on its way */
589     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
590     /* do local part */
591     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
592     /* receive remote parts: note this assumes the values are not actually */
593     /* added in yy until the next line, */
594     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
595   } else {
596     /* do local part */
597     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
598     /* send it on its way */
599     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
600     /* values actually were received in the Begin() but we need to call this nop */
601     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
602   }
603   PetscFunctionReturn(0);
604 }
605 
606 EXTERN_C_BEGIN
607 #undef __FUNCT__
608 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
609 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
610 {
611   MPI_Comm       comm;
612   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
613   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
614   IS             Me,Notme;
615   PetscErrorCode ierr;
616   PetscInt       M,N,first,last,*notme,i;
617   PetscMPIInt    size;
618 
619   PetscFunctionBegin;
620 
621   /* Easy test: symmetric diagonal block */
622   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
623   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
624   if (!*f) PetscFunctionReturn(0);
625   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
626   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
627   if (size == 1) PetscFunctionReturn(0);
628 
629   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
630   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
631   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
632   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
633   for (i=0; i<first; i++) notme[i] = i;
634   for (i=last; i<M; i++) notme[i-last+first] = i;
635   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
636   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
637   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
638   Aoff = Aoffs[0];
639   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
640   Boff = Boffs[0];
641   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
642   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
643   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
644   ierr = ISDestroy(Me);CHKERRQ(ierr);
645   ierr = ISDestroy(Notme);CHKERRQ(ierr);
646 
647   PetscFunctionReturn(0);
648 }
649 EXTERN_C_END
650 
651 #undef __FUNCT__
652 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
653 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
654 {
655   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
656   PetscErrorCode ierr;
657 
658   PetscFunctionBegin;
659   /* do nondiagonal part */
660   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
661   /* send it on its way */
662   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
663   /* do local part */
664   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
665   /* receive remote parts */
666   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
667   PetscFunctionReturn(0);
668 }
669 
670 /*
671   This only works correctly for square matrices where the subblock A->A is the
672    diagonal block
673 */
674 #undef __FUNCT__
675 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
676 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
677 {
678   PetscErrorCode ierr;
679   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
680 
681   PetscFunctionBegin;
682   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
683   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
684     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
685   }
686   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 
690 #undef __FUNCT__
691 #define __FUNCT__ "MatScale_MPIAIJ"
692 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
693 {
694   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
695   PetscErrorCode ierr;
696 
697   PetscFunctionBegin;
698   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
699   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
700   PetscFunctionReturn(0);
701 }
702 
703 #undef __FUNCT__
704 #define __FUNCT__ "MatDestroy_MPIAIJ"
705 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
706 {
707   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
708   PetscErrorCode ierr;
709 
710   PetscFunctionBegin;
711 #if defined(PETSC_USE_LOG)
712   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
713 #endif
714   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
715   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
716   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
717 #if defined (PETSC_USE_CTABLE)
718   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
719 #else
720   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
721 #endif
722   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
723   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
724   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
725   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
726   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
727   ierr = PetscFree(aij);CHKERRQ(ierr);
728 
729   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
730   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
736   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatView_MPIAIJ_Binary"
742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
743 {
744   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
745   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
746   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
747   PetscErrorCode    ierr;
748   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
749   int               fd;
750   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
751   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
752   PetscScalar       *column_values;
753 
754   PetscFunctionBegin;
755   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
756   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
757   nz   = A->nz + B->nz;
758   if (!rank) {
759     header[0] = MAT_FILE_COOKIE;
760     header[1] = mat->rmap.N;
761     header[2] = mat->cmap.N;
762     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
763     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
764     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
765     /* get largest number of rows any processor has */
766     rlen = mat->rmap.n;
767     range = mat->rmap.range;
768     for (i=1; i<size; i++) {
769       rlen = PetscMax(rlen,range[i+1] - range[i]);
770     }
771   } else {
772     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
773     rlen = mat->rmap.n;
774   }
775 
776   /* load up the local row counts */
777   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
778   for (i=0; i<mat->rmap.n; i++) {
779     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
780   }
781 
782   /* store the row lengths to the file */
783   if (!rank) {
784     MPI_Status status;
785     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
786     for (i=1; i<size; i++) {
787       rlen = range[i+1] - range[i];
788       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
789       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
790     }
791   } else {
792     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
793   }
794   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
795 
796   /* load up the local column indices */
797   nzmax = nz; /* )th processor needs space a largest processor needs */
798   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
799   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
800   cnt  = 0;
801   for (i=0; i<mat->rmap.n; i++) {
802     for (j=B->i[i]; j<B->i[i+1]; j++) {
803       if ( (col = garray[B->j[j]]) > cstart) break;
804       column_indices[cnt++] = col;
805     }
806     for (k=A->i[i]; k<A->i[i+1]; k++) {
807       column_indices[cnt++] = A->j[k] + cstart;
808     }
809     for (; j<B->i[i+1]; j++) {
810       column_indices[cnt++] = garray[B->j[j]];
811     }
812   }
813   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
814 
815   /* store the column indices to the file */
816   if (!rank) {
817     MPI_Status status;
818     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
819     for (i=1; i<size; i++) {
820       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
821       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
822       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
823       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
824     }
825   } else {
826     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
827     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
828   }
829   ierr = PetscFree(column_indices);CHKERRQ(ierr);
830 
831   /* load up the local column values */
832   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
833   cnt  = 0;
834   for (i=0; i<mat->rmap.n; i++) {
835     for (j=B->i[i]; j<B->i[i+1]; j++) {
836       if ( garray[B->j[j]] > cstart) break;
837       column_values[cnt++] = B->a[j];
838     }
839     for (k=A->i[i]; k<A->i[i+1]; k++) {
840       column_values[cnt++] = A->a[k];
841     }
842     for (; j<B->i[i+1]; j++) {
843       column_values[cnt++] = B->a[j];
844     }
845   }
846   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
847 
848   /* store the column values to the file */
849   if (!rank) {
850     MPI_Status status;
851     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
852     for (i=1; i<size; i++) {
853       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
854       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
855       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
856       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
857     }
858   } else {
859     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
860     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
861   }
862   ierr = PetscFree(column_values);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
869 {
870   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
871   PetscErrorCode    ierr;
872   PetscMPIInt       rank = aij->rank,size = aij->size;
873   PetscTruth        isdraw,iascii,isbinary;
874   PetscViewer       sviewer;
875   PetscViewerFormat format;
876 
877   PetscFunctionBegin;
878   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
880   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
881   if (iascii) {
882     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
883     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
884       MatInfo    info;
885       PetscTruth inodes;
886 
887       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
888       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
889       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
890       if (!inodes) {
891         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
892 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
893       } else {
894         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
895 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
896       }
897       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
898       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
899       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
900       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
901       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
902       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
903       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
904       PetscFunctionReturn(0);
905     } else if (format == PETSC_VIEWER_ASCII_INFO) {
906       PetscInt   inodecount,inodelimit,*inodes;
907       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
908       if (inodes) {
909         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
910       } else {
911         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
912       }
913       PetscFunctionReturn(0);
914     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
915       PetscFunctionReturn(0);
916     }
917   } else if (isbinary) {
918     if (size == 1) {
919       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
920       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
921     } else {
922       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
923     }
924     PetscFunctionReturn(0);
925   } else if (isdraw) {
926     PetscDraw  draw;
927     PetscTruth isnull;
928     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
929     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
930   }
931 
932   if (size == 1) {
933     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
934     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
935   } else {
936     /* assemble the entire matrix onto first processor. */
937     Mat         A;
938     Mat_SeqAIJ  *Aloc;
939     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
940     PetscScalar *a;
941 
942     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
943     if (!rank) {
944       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
945     } else {
946       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
947     }
948     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
949     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
950     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
951     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
952 
953     /* copy over the A part */
954     Aloc = (Mat_SeqAIJ*)aij->A->data;
955     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
956     row = mat->rmap.rstart;
957     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
958     for (i=0; i<m; i++) {
959       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
960       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
961     }
962     aj = Aloc->j;
963     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
964 
965     /* copy over the B part */
966     Aloc = (Mat_SeqAIJ*)aij->B->data;
967     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
968     row  = mat->rmap.rstart;
969     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
970     ct   = cols;
971     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
972     for (i=0; i<m; i++) {
973       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
974       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
975     }
976     ierr = PetscFree(ct);CHKERRQ(ierr);
977     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
979     /*
980        Everyone has to call to draw the matrix since the graphics waits are
981        synchronized across all processors that share the PetscDraw object
982     */
983     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
984     if (!rank) {
985       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
986       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
987     }
988     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
989     ierr = MatDestroy(A);CHKERRQ(ierr);
990   }
991   PetscFunctionReturn(0);
992 }
993 
994 #undef __FUNCT__
995 #define __FUNCT__ "MatView_MPIAIJ"
996 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
997 {
998   PetscErrorCode ierr;
999   PetscTruth     iascii,isdraw,issocket,isbinary;
1000 
1001   PetscFunctionBegin;
1002   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1003   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1004   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1005   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1006   if (iascii || isdraw || isbinary || issocket) {
1007     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1008   } else {
1009     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1010   }
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatRelax_MPIAIJ"
1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1017 {
1018   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1019   PetscErrorCode ierr;
1020   Vec            bb1;
1021 
1022   PetscFunctionBegin;
1023   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1024 
1025   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1026 
1027   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1028     if (flag & SOR_ZERO_INITIAL_GUESS) {
1029       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1030       its--;
1031     }
1032 
1033     while (its--) {
1034       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1035       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036 
1037       /* update rhs: bb1 = bb - B*x */
1038       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1039       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1040 
1041       /* local sweep */
1042       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1043       CHKERRQ(ierr);
1044     }
1045   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1046     if (flag & SOR_ZERO_INITIAL_GUESS) {
1047       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1048       its--;
1049     }
1050     while (its--) {
1051       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1052       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1053 
1054       /* update rhs: bb1 = bb - B*x */
1055       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1056       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1057 
1058       /* local sweep */
1059       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1060       CHKERRQ(ierr);
1061     }
1062   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1063     if (flag & SOR_ZERO_INITIAL_GUESS) {
1064       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1065       its--;
1066     }
1067     while (its--) {
1068       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1069       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1070 
1071       /* update rhs: bb1 = bb - B*x */
1072       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1073       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1074 
1075       /* local sweep */
1076       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1077       CHKERRQ(ierr);
1078     }
1079   } else {
1080     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1081   }
1082 
1083   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatPermute_MPIAIJ"
1089 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1090 {
1091   MPI_Comm       comm,pcomm;
1092   PetscInt       first,local_size,nrows,*rows;
1093   int            ntids;
1094   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1095   PetscErrorCode ierr;
1096 
1097   PetscFunctionBegin;
1098   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1099   /* make a collective version of 'rowp' */
1100   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1101   if (pcomm==comm) {
1102     crowp = rowp;
1103   } else {
1104     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1105     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1106     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1107     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1108   }
1109   /* collect the global row permutation and invert it */
1110   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1111   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1112   if (pcomm!=comm) {
1113     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1114   }
1115   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1116   /* get the local target indices */
1117   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1118   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1119   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1120   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1121   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1122   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1123   /* the column permutation is so much easier;
1124      make a local version of 'colp' and invert it */
1125   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1126   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1127   if (ntids==1) {
1128     lcolp = colp;
1129   } else {
1130     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1131     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1132     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1133   }
1134   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1135   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1136   if (ntids>1) {
1137     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1138     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1139   }
1140   /* now we just get the submatrix */
1141   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1142   /* clean up */
1143   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1144   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1150 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1151 {
1152   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1153   Mat            A = mat->A,B = mat->B;
1154   PetscErrorCode ierr;
1155   PetscReal      isend[5],irecv[5];
1156 
1157   PetscFunctionBegin;
1158   info->block_size     = 1.0;
1159   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1160   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1161   isend[3] = info->memory;  isend[4] = info->mallocs;
1162   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1163   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1164   isend[3] += info->memory;  isend[4] += info->mallocs;
1165   if (flag == MAT_LOCAL) {
1166     info->nz_used      = isend[0];
1167     info->nz_allocated = isend[1];
1168     info->nz_unneeded  = isend[2];
1169     info->memory       = isend[3];
1170     info->mallocs      = isend[4];
1171   } else if (flag == MAT_GLOBAL_MAX) {
1172     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1173     info->nz_used      = irecv[0];
1174     info->nz_allocated = irecv[1];
1175     info->nz_unneeded  = irecv[2];
1176     info->memory       = irecv[3];
1177     info->mallocs      = irecv[4];
1178   } else if (flag == MAT_GLOBAL_SUM) {
1179     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1180     info->nz_used      = irecv[0];
1181     info->nz_allocated = irecv[1];
1182     info->nz_unneeded  = irecv[2];
1183     info->memory       = irecv[3];
1184     info->mallocs      = irecv[4];
1185   }
1186   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1187   info->fill_ratio_needed = 0;
1188   info->factor_mallocs    = 0;
1189   info->rows_global       = (double)matin->rmap.N;
1190   info->columns_global    = (double)matin->cmap.N;
1191   info->rows_local        = (double)matin->rmap.n;
1192   info->columns_local     = (double)matin->cmap.N;
1193 
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "MatSetOption_MPIAIJ"
1199 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1200 {
1201   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1202   PetscErrorCode ierr;
1203 
1204   PetscFunctionBegin;
1205   switch (op) {
1206   case MAT_NEW_NONZERO_LOCATIONS:
1207   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1208   case MAT_KEEP_ZEROED_ROWS:
1209   case MAT_NEW_NONZERO_LOCATION_ERR:
1210   case MAT_USE_INODES:
1211   case MAT_IGNORE_ZERO_ENTRIES:
1212     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1213     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1214     break;
1215   case MAT_ROW_ORIENTED:
1216     a->roworiented = flg;
1217     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1218     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1219     break;
1220   case MAT_NEW_DIAGONALS:
1221     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1222     break;
1223   case MAT_IGNORE_OFF_PROC_ENTRIES:
1224     a->donotstash = PETSC_TRUE;
1225     break;
1226   case MAT_SYMMETRIC:
1227     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1228     break;
1229   case MAT_STRUCTURALLY_SYMMETRIC:
1230   case MAT_HERMITIAN:
1231   case MAT_SYMMETRY_ETERNAL:
1232     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1233     break;
1234   default:
1235     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1236   }
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 #undef __FUNCT__
1241 #define __FUNCT__ "MatGetRow_MPIAIJ"
1242 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1243 {
1244   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1245   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1246   PetscErrorCode ierr;
1247   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1248   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1249   PetscInt       *cmap,*idx_p;
1250 
1251   PetscFunctionBegin;
1252   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1253   mat->getrowactive = PETSC_TRUE;
1254 
1255   if (!mat->rowvalues && (idx || v)) {
1256     /*
1257         allocate enough space to hold information from the longest row.
1258     */
1259     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1260     PetscInt     max = 1,tmp;
1261     for (i=0; i<matin->rmap.n; i++) {
1262       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1263       if (max < tmp) { max = tmp; }
1264     }
1265     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1266     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1267   }
1268 
1269   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1270   lrow = row - rstart;
1271 
1272   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1273   if (!v)   {pvA = 0; pvB = 0;}
1274   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1275   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1276   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1277   nztot = nzA + nzB;
1278 
1279   cmap  = mat->garray;
1280   if (v  || idx) {
1281     if (nztot) {
1282       /* Sort by increasing column numbers, assuming A and B already sorted */
1283       PetscInt imark = -1;
1284       if (v) {
1285         *v = v_p = mat->rowvalues;
1286         for (i=0; i<nzB; i++) {
1287           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1288           else break;
1289         }
1290         imark = i;
1291         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1292         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1293       }
1294       if (idx) {
1295         *idx = idx_p = mat->rowindices;
1296         if (imark > -1) {
1297           for (i=0; i<imark; i++) {
1298             idx_p[i] = cmap[cworkB[i]];
1299           }
1300         } else {
1301           for (i=0; i<nzB; i++) {
1302             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1303             else break;
1304           }
1305           imark = i;
1306         }
1307         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1308         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1309       }
1310     } else {
1311       if (idx) *idx = 0;
1312       if (v)   *v   = 0;
1313     }
1314   }
1315   *nz = nztot;
1316   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1317   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 #undef __FUNCT__
1322 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1323 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1324 {
1325   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1326 
1327   PetscFunctionBegin;
1328   if (!aij->getrowactive) {
1329     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1330   }
1331   aij->getrowactive = PETSC_FALSE;
1332   PetscFunctionReturn(0);
1333 }
1334 
1335 #undef __FUNCT__
1336 #define __FUNCT__ "MatNorm_MPIAIJ"
1337 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1338 {
1339   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1340   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1341   PetscErrorCode ierr;
1342   PetscInt       i,j,cstart = mat->cmap.rstart;
1343   PetscReal      sum = 0.0;
1344   PetscScalar    *v;
1345 
1346   PetscFunctionBegin;
1347   if (aij->size == 1) {
1348     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1349   } else {
1350     if (type == NORM_FROBENIUS) {
1351       v = amat->a;
1352       for (i=0; i<amat->nz; i++) {
1353 #if defined(PETSC_USE_COMPLEX)
1354         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1355 #else
1356         sum += (*v)*(*v); v++;
1357 #endif
1358       }
1359       v = bmat->a;
1360       for (i=0; i<bmat->nz; i++) {
1361 #if defined(PETSC_USE_COMPLEX)
1362         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1363 #else
1364         sum += (*v)*(*v); v++;
1365 #endif
1366       }
1367       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1368       *norm = sqrt(*norm);
1369     } else if (type == NORM_1) { /* max column norm */
1370       PetscReal *tmp,*tmp2;
1371       PetscInt    *jj,*garray = aij->garray;
1372       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1373       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1374       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1375       *norm = 0.0;
1376       v = amat->a; jj = amat->j;
1377       for (j=0; j<amat->nz; j++) {
1378         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1379       }
1380       v = bmat->a; jj = bmat->j;
1381       for (j=0; j<bmat->nz; j++) {
1382         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1383       }
1384       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1385       for (j=0; j<mat->cmap.N; j++) {
1386         if (tmp2[j] > *norm) *norm = tmp2[j];
1387       }
1388       ierr = PetscFree(tmp);CHKERRQ(ierr);
1389       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1390     } else if (type == NORM_INFINITY) { /* max row norm */
1391       PetscReal ntemp = 0.0;
1392       for (j=0; j<aij->A->rmap.n; j++) {
1393         v = amat->a + amat->i[j];
1394         sum = 0.0;
1395         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1396           sum += PetscAbsScalar(*v); v++;
1397         }
1398         v = bmat->a + bmat->i[j];
1399         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1400           sum += PetscAbsScalar(*v); v++;
1401         }
1402         if (sum > ntemp) ntemp = sum;
1403       }
1404       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1405     } else {
1406       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1407     }
1408   }
1409   PetscFunctionReturn(0);
1410 }
1411 
1412 #undef __FUNCT__
1413 #define __FUNCT__ "MatTranspose_MPIAIJ"
1414 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1415 {
1416   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1417   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1418   PetscErrorCode ierr;
1419   PetscInt       M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz;
1420   PetscInt       cstart=A->cmap.rstart,ncol;
1421   Mat            B;
1422   PetscScalar    *array;
1423 
1424   PetscFunctionBegin;
1425   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1426 
1427   /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1428   ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n;
1429   ai = Aloc->i; aj = Aloc->j;
1430   bi = Bloc->i; bj = Bloc->j;
1431   ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1432   cols = d_nnz + na + 1; /* work space to be used by B part */
1433   ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1434   for (i=0; i<ai[ma]; i++){
1435     d_nnz[aj[i]] ++;
1436     aj[i] += cstart; /* global col index to be used by MatSetValues() */
1437   }
1438 
1439   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1440   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1441   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1442   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1443 
1444   /* copy over the A part */
1445   array = Aloc->a;
1446   row = A->rmap.rstart;
1447   for (i=0; i<ma; i++) {
1448     ncol = ai[i+1]-ai[i];
1449     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1450     row++; array += ncol; aj += ncol;
1451   }
1452   aj = Aloc->j;
1453   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1454 
1455   /* copy over the B part */
1456   array = Bloc->a;
1457   row = A->rmap.rstart;
1458   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1459   for (i=0; i<mb; i++) {
1460     ncol = bi[i+1]-bi[i];
1461     ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1462     row++; array += ncol; cols += ncol;
1463   }
1464   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1465   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1466   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1467   if (matout) {
1468     *matout = B;
1469   } else {
1470     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1471   }
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 #undef __FUNCT__
1476 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1477 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1478 {
1479   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1480   Mat            a = aij->A,b = aij->B;
1481   PetscErrorCode ierr;
1482   PetscInt       s1,s2,s3;
1483 
1484   PetscFunctionBegin;
1485   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1486   if (rr) {
1487     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1488     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1489     /* Overlap communication with computation. */
1490     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1491   }
1492   if (ll) {
1493     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1494     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1495     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1496   }
1497   /* scale  the diagonal block */
1498   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1499 
1500   if (rr) {
1501     /* Do a scatter end and then right scale the off-diagonal block */
1502     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1503     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1504   }
1505 
1506   PetscFunctionReturn(0);
1507 }
1508 
1509 #undef __FUNCT__
1510 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1511 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1512 {
1513   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1514   PetscErrorCode ierr;
1515 
1516   PetscFunctionBegin;
1517   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1518   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1519   PetscFunctionReturn(0);
1520 }
1521 #undef __FUNCT__
1522 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1523 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1524 {
1525   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1526   PetscErrorCode ierr;
1527 
1528   PetscFunctionBegin;
1529   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 #undef __FUNCT__
1534 #define __FUNCT__ "MatEqual_MPIAIJ"
1535 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1536 {
1537   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1538   Mat            a,b,c,d;
1539   PetscTruth     flg;
1540   PetscErrorCode ierr;
1541 
1542   PetscFunctionBegin;
1543   a = matA->A; b = matA->B;
1544   c = matB->A; d = matB->B;
1545 
1546   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1547   if (flg) {
1548     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1549   }
1550   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1551   PetscFunctionReturn(0);
1552 }
1553 
1554 #undef __FUNCT__
1555 #define __FUNCT__ "MatCopy_MPIAIJ"
1556 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1557 {
1558   PetscErrorCode ierr;
1559   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1560   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1561 
1562   PetscFunctionBegin;
1563   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1564   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1565     /* because of the column compression in the off-processor part of the matrix a->B,
1566        the number of columns in a->B and b->B may be different, hence we cannot call
1567        the MatCopy() directly on the two parts. If need be, we can provide a more
1568        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1569        then copying the submatrices */
1570     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1571   } else {
1572     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1573     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1574   }
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 #undef __FUNCT__
1579 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1580 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1581 {
1582   PetscErrorCode ierr;
1583 
1584   PetscFunctionBegin;
1585   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 #include "petscblaslapack.h"
1590 #undef __FUNCT__
1591 #define __FUNCT__ "MatAXPY_MPIAIJ"
1592 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1593 {
1594   PetscErrorCode ierr;
1595   PetscInt       i;
1596   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1597   PetscBLASInt   bnz,one=1;
1598   Mat_SeqAIJ     *x,*y;
1599 
1600   PetscFunctionBegin;
1601   if (str == SAME_NONZERO_PATTERN) {
1602     PetscScalar alpha = a;
1603     x = (Mat_SeqAIJ *)xx->A->data;
1604     y = (Mat_SeqAIJ *)yy->A->data;
1605     bnz = (PetscBLASInt)x->nz;
1606     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1607     x = (Mat_SeqAIJ *)xx->B->data;
1608     y = (Mat_SeqAIJ *)yy->B->data;
1609     bnz = (PetscBLASInt)x->nz;
1610     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1611   } else if (str == SUBSET_NONZERO_PATTERN) {
1612     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1613 
1614     x = (Mat_SeqAIJ *)xx->B->data;
1615     y = (Mat_SeqAIJ *)yy->B->data;
1616     if (y->xtoy && y->XtoY != xx->B) {
1617       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1618       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1619     }
1620     if (!y->xtoy) { /* get xtoy */
1621       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1622       y->XtoY = xx->B;
1623       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1624     }
1625     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1626   } else {
1627     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1628   }
1629   PetscFunctionReturn(0);
1630 }
1631 
1632 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1633 
1634 #undef __FUNCT__
1635 #define __FUNCT__ "MatConjugate_MPIAIJ"
1636 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1637 {
1638 #if defined(PETSC_USE_COMPLEX)
1639   PetscErrorCode ierr;
1640   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1641 
1642   PetscFunctionBegin;
1643   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1644   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1645 #else
1646   PetscFunctionBegin;
1647 #endif
1648   PetscFunctionReturn(0);
1649 }
1650 
1651 #undef __FUNCT__
1652 #define __FUNCT__ "MatRealPart_MPIAIJ"
1653 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1654 {
1655   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1656   PetscErrorCode ierr;
1657 
1658   PetscFunctionBegin;
1659   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1660   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1661   PetscFunctionReturn(0);
1662 }
1663 
1664 #undef __FUNCT__
1665 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1666 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1667 {
1668   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1669   PetscErrorCode ierr;
1670 
1671   PetscFunctionBegin;
1672   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1673   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1674   PetscFunctionReturn(0);
1675 }
1676 
1677 #ifdef PETSC_HAVE_PBGL
1678 
1679 #include <boost/parallel/mpi/bsp_process_group.hpp>
1680 #include <boost/graph/distributed/ilu_default_graph.hpp>
1681 #include <boost/graph/distributed/ilu_0_block.hpp>
1682 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1683 #include <boost/graph/distributed/petsc/interface.hpp>
1684 #include <boost/multi_array.hpp>
1685 #include <boost/parallel/distributed_property_map.hpp>
1686 
1687 #undef __FUNCT__
1688 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1689 /*
1690   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1691 */
1692 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1693 {
1694   namespace petsc = boost::distributed::petsc;
1695 
1696   namespace graph_dist = boost::graph::distributed;
1697   using boost::graph::distributed::ilu_default::process_group_type;
1698   using boost::graph::ilu_permuted;
1699 
1700   PetscTruth      row_identity, col_identity;
1701   PetscContainer  c;
1702   PetscInt        m, n, M, N;
1703   PetscErrorCode  ierr;
1704 
1705   PetscFunctionBegin;
1706   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1707   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1708   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1709   if (!row_identity || !col_identity) {
1710     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1711   }
1712 
1713   process_group_type pg;
1714   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1715   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1716   lgraph_type&   level_graph = *lgraph_p;
1717   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1718 
1719   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1720   ilu_permuted(level_graph);
1721 
1722   /* put together the new matrix */
1723   ierr = MatCreate(A->comm, fact);CHKERRQ(ierr);
1724   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1725   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1726   ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr);
1727   ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr);
1728   ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1729   ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1730   (*fact)->factor = FACTOR_LU;
1731 
1732   ierr = PetscContainerCreate(A->comm, &c);
1733   ierr = PetscContainerSetPointer(c, lgraph_p);
1734   ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1735   PetscFunctionReturn(0);
1736 }
1737 
1738 #undef __FUNCT__
1739 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1740 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1741 {
1742   PetscFunctionBegin;
1743   PetscFunctionReturn(0);
1744 }
1745 
1746 #undef __FUNCT__
1747 #define __FUNCT__ "MatSolve_MPIAIJ"
1748 /*
1749   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1750 */
1751 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1752 {
1753   namespace graph_dist = boost::graph::distributed;
1754 
1755   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1756   lgraph_type*   lgraph_p;
1757   PetscContainer c;
1758   PetscErrorCode ierr;
1759 
1760   PetscFunctionBegin;
1761   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1762   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1763   ierr = VecCopy(b, x); CHKERRQ(ierr);
1764 
1765   PetscScalar* array_x;
1766   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1767   PetscInt sx;
1768   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1769 
1770   PetscScalar* array_b;
1771   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1772   PetscInt sb;
1773   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1774 
1775   lgraph_type&   level_graph = *lgraph_p;
1776   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1777 
1778   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1779   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1780                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1781 
1782   typedef boost::iterator_property_map<array_ref_type::iterator,
1783                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1784   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1785                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1786 
1787   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1788 
1789   PetscFunctionReturn(0);
1790 }
1791 #endif
1792 
1793 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1794   PetscInt       nzlocal,nsends,nrecvs;
1795   PetscInt       *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j;
1796   PetscScalar    *sbuf_a,**rbuf_a;
1797   PetscErrorCode (*MatDestroy)(Mat);
1798 } Mat_Redundant;
1799 
1800 #undef __FUNCT__
1801 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1802 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1803 {
1804   PetscErrorCode       ierr;
1805   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1806   PetscInt             i;
1807 
1808   PetscFunctionBegin;
1809   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1810   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1811   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1812   for (i=0; i<redund->nrecvs; i++){
1813     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1814     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1815   }
1816   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1817   ierr = PetscFree(redund);CHKERRQ(ierr);
1818   PetscFunctionReturn(0);
1819 }
1820 
1821 #undef __FUNCT__
1822 #define __FUNCT__ "MatDestroy_MatRedundant"
1823 PetscErrorCode MatDestroy_MatRedundant(Mat A)
1824 {
1825   PetscErrorCode  ierr;
1826   PetscContainer  container;
1827   Mat_Redundant   *redund=PETSC_NULL;
1828 
1829   PetscFunctionBegin;
1830   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1831   if (container) {
1832     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1833   } else {
1834     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1835   }
1836   A->ops->destroy = redund->MatDestroy;
1837   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
1838   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
1839   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
1840   PetscFunctionReturn(0);
1841 }
1842 
1843 #undef __FUNCT__
1844 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
1845 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
1846 {
1847   PetscMPIInt    rank,size;
1848   MPI_Comm       comm=mat->comm;
1849   PetscErrorCode ierr;
1850   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
1851   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
1852   PetscInt       *rowrange=mat->rmap.range;
1853   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1854   Mat            A=aij->A,B=aij->B,C=*matredundant;
1855   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
1856   PetscScalar    *sbuf_a;
1857   PetscInt       nzlocal=a->nz+b->nz;
1858   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
1859   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
1860   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
1861   PetscScalar    *vals,*aworkA,*aworkB;
1862   PetscMPIInt    tag1,tag2,tag3,imdex;
1863   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
1864                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
1865   MPI_Status     recv_status,*send_status;
1866   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
1867   PetscInt       **rbuf_j=PETSC_NULL;
1868   PetscScalar    **rbuf_a=PETSC_NULL;
1869   Mat_Redundant  *redund=PETSC_NULL;
1870   PetscContainer container;
1871 
1872   PetscFunctionBegin;
1873   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1874   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1875 
1876   if (reuse == MAT_REUSE_MATRIX) {
1877     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
1878     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
1879     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
1880     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
1881     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1882     if (container) {
1883       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1884     } else {
1885       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1886     }
1887     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
1888 
1889     nsends    = redund->nsends;
1890     nrecvs    = redund->nrecvs;
1891     send_rank = redund->send_rank; recv_rank = send_rank + size;
1892     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
1893     sbuf_j    = redund->sbuf_j;
1894     sbuf_a    = redund->sbuf_a;
1895     rbuf_j    = redund->rbuf_j;
1896     rbuf_a    = redund->rbuf_a;
1897   }
1898 
1899   if (reuse == MAT_INITIAL_MATRIX){
1900     PetscMPIInt  subrank,subsize;
1901     PetscInt     nleftover,np_subcomm;
1902     /* get the destination processors' id send_rank, nsends and nrecvs */
1903     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
1904     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
1905     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
1906     recv_rank = send_rank + size;
1907     np_subcomm = size/nsubcomm;
1908     nleftover  = size - nsubcomm*np_subcomm;
1909     nsends = 0; nrecvs = 0;
1910     for (i=0; i<size; i++){ /* i=rank*/
1911       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
1912         send_rank[nsends] = i; nsends++;
1913         recv_rank[nrecvs++] = i;
1914       }
1915     }
1916     if (rank >= size - nleftover){/* this proc is a leftover processor */
1917       i = size-nleftover-1;
1918       j = 0;
1919       while (j < nsubcomm - nleftover){
1920         send_rank[nsends++] = i;
1921         i--; j++;
1922       }
1923     }
1924 
1925     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
1926       for (i=0; i<nleftover; i++){
1927         recv_rank[nrecvs++] = size-nleftover+i;
1928       }
1929     }
1930 
1931     /* allocate sbuf_j, sbuf_a */
1932     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
1933     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
1934     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
1935   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1936 
1937   /* copy mat's local entries into the buffers */
1938   if (reuse == MAT_INITIAL_MATRIX){
1939     rownz_max = 0;
1940     rptr = sbuf_j;
1941     cols = sbuf_j + rend-rstart + 1;
1942     vals = sbuf_a;
1943     rptr[0] = 0;
1944     for (i=0; i<rend-rstart; i++){
1945       row = i + rstart;
1946       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1947       ncols  = nzA + nzB;
1948       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1949       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1950       /* load the column indices for this row into cols */
1951       lwrite = 0;
1952       for (l=0; l<nzB; l++) {
1953         if ((ctmp = bmap[cworkB[l]]) < cstart){
1954           vals[lwrite]   = aworkB[l];
1955           cols[lwrite++] = ctmp;
1956         }
1957       }
1958       for (l=0; l<nzA; l++){
1959         vals[lwrite]   = aworkA[l];
1960         cols[lwrite++] = cstart + cworkA[l];
1961       }
1962       for (l=0; l<nzB; l++) {
1963         if ((ctmp = bmap[cworkB[l]]) >= cend){
1964           vals[lwrite]   = aworkB[l];
1965           cols[lwrite++] = ctmp;
1966         }
1967       }
1968       vals += ncols;
1969       cols += ncols;
1970       rptr[i+1] = rptr[i] + ncols;
1971       if (rownz_max < ncols) rownz_max = ncols;
1972     }
1973     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
1974   } else { /* only copy matrix values into sbuf_a */
1975     rptr = sbuf_j;
1976     vals = sbuf_a;
1977     rptr[0] = 0;
1978     for (i=0; i<rend-rstart; i++){
1979       row = i + rstart;
1980       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1981       ncols  = nzA + nzB;
1982       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1983       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1984       lwrite = 0;
1985       for (l=0; l<nzB; l++) {
1986         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
1987       }
1988       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
1989       for (l=0; l<nzB; l++) {
1990         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
1991       }
1992       vals += ncols;
1993       rptr[i+1] = rptr[i] + ncols;
1994     }
1995   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1996 
1997   /* send nzlocal to others, and recv other's nzlocal */
1998   /*--------------------------------------------------*/
1999   if (reuse == MAT_INITIAL_MATRIX){
2000     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2001     s_waits2 = s_waits3 + nsends;
2002     s_waits1 = s_waits2 + nsends;
2003     r_waits1 = s_waits1 + nsends;
2004     r_waits2 = r_waits1 + nrecvs;
2005     r_waits3 = r_waits2 + nrecvs;
2006   } else {
2007     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2008     r_waits3 = s_waits3 + nsends;
2009   }
2010 
2011   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2012   if (reuse == MAT_INITIAL_MATRIX){
2013     /* get new tags to keep the communication clean */
2014     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2015     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2016     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2017     rbuf_nz = sbuf_nz + nsends;
2018 
2019     /* post receives of other's nzlocal */
2020     for (i=0; i<nrecvs; i++){
2021       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2022     }
2023     /* send nzlocal to others */
2024     for (i=0; i<nsends; i++){
2025       sbuf_nz[i] = nzlocal;
2026       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2027     }
2028     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2029     count = nrecvs;
2030     while (count) {
2031       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2032       recv_rank[imdex] = recv_status.MPI_SOURCE;
2033       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2034       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2035 
2036       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2037       rbuf_nz[imdex] += i + 2;
2038       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2039       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2040       count--;
2041     }
2042     /* wait on sends of nzlocal */
2043     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2044     /* send mat->i,j to others, and recv from other's */
2045     /*------------------------------------------------*/
2046     for (i=0; i<nsends; i++){
2047       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2048       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2049     }
2050     /* wait on receives of mat->i,j */
2051     /*------------------------------*/
2052     count = nrecvs;
2053     while (count) {
2054       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2055       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2056       count--;
2057     }
2058     /* wait on sends of mat->i,j */
2059     /*---------------------------*/
2060     if (nsends) {
2061       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2062     }
2063   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2064 
2065   /* post receives, send and receive mat->a */
2066   /*----------------------------------------*/
2067   for (imdex=0; imdex<nrecvs; imdex++) {
2068     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2069   }
2070   for (i=0; i<nsends; i++){
2071     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2072   }
2073   count = nrecvs;
2074   while (count) {
2075     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2076     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2077     count--;
2078   }
2079   if (nsends) {
2080     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2081   }
2082 
2083   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2084 
2085   /* create redundant matrix */
2086   /*-------------------------*/
2087   if (reuse == MAT_INITIAL_MATRIX){
2088     /* compute rownz_max for preallocation */
2089     for (imdex=0; imdex<nrecvs; imdex++){
2090       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2091       rptr = rbuf_j[imdex];
2092       for (i=0; i<j; i++){
2093         ncols = rptr[i+1] - rptr[i];
2094         if (rownz_max < ncols) rownz_max = ncols;
2095       }
2096     }
2097 
2098     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2099     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2100     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2101     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2102     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2103   } else {
2104     C = *matredundant;
2105   }
2106 
2107   /* insert local matrix entries */
2108   rptr = sbuf_j;
2109   cols = sbuf_j + rend-rstart + 1;
2110   vals = sbuf_a;
2111   for (i=0; i<rend-rstart; i++){
2112     row   = i + rstart;
2113     ncols = rptr[i+1] - rptr[i];
2114     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2115     vals += ncols;
2116     cols += ncols;
2117   }
2118   /* insert received matrix entries */
2119   for (imdex=0; imdex<nrecvs; imdex++){
2120     rstart = rowrange[recv_rank[imdex]];
2121     rend   = rowrange[recv_rank[imdex]+1];
2122     rptr = rbuf_j[imdex];
2123     cols = rbuf_j[imdex] + rend-rstart + 1;
2124     vals = rbuf_a[imdex];
2125     for (i=0; i<rend-rstart; i++){
2126       row   = i + rstart;
2127       ncols = rptr[i+1] - rptr[i];
2128       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2129       vals += ncols;
2130       cols += ncols;
2131     }
2132   }
2133   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2134   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2135   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2136   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
2137   if (reuse == MAT_INITIAL_MATRIX){
2138     PetscContainer container;
2139     *matredundant = C;
2140     /* create a supporting struct and attach it to C for reuse */
2141     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2142     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2143     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2144     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2145     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2146 
2147     redund->nzlocal = nzlocal;
2148     redund->nsends  = nsends;
2149     redund->nrecvs  = nrecvs;
2150     redund->send_rank = send_rank;
2151     redund->sbuf_nz = sbuf_nz;
2152     redund->sbuf_j  = sbuf_j;
2153     redund->sbuf_a  = sbuf_a;
2154     redund->rbuf_j  = rbuf_j;
2155     redund->rbuf_a  = rbuf_a;
2156 
2157     redund->MatDestroy = C->ops->destroy;
2158     C->ops->destroy    = MatDestroy_MatRedundant;
2159   }
2160   PetscFunctionReturn(0);
2161 }
2162 
2163 /* -------------------------------------------------------------------*/
2164 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2165        MatGetRow_MPIAIJ,
2166        MatRestoreRow_MPIAIJ,
2167        MatMult_MPIAIJ,
2168 /* 4*/ MatMultAdd_MPIAIJ,
2169        MatMultTranspose_MPIAIJ,
2170        MatMultTransposeAdd_MPIAIJ,
2171 #ifdef PETSC_HAVE_PBGL
2172        MatSolve_MPIAIJ,
2173 #else
2174        0,
2175 #endif
2176        0,
2177        0,
2178 /*10*/ 0,
2179        0,
2180        0,
2181        MatRelax_MPIAIJ,
2182        MatTranspose_MPIAIJ,
2183 /*15*/ MatGetInfo_MPIAIJ,
2184        MatEqual_MPIAIJ,
2185        MatGetDiagonal_MPIAIJ,
2186        MatDiagonalScale_MPIAIJ,
2187        MatNorm_MPIAIJ,
2188 /*20*/ MatAssemblyBegin_MPIAIJ,
2189        MatAssemblyEnd_MPIAIJ,
2190        0,
2191        MatSetOption_MPIAIJ,
2192        MatZeroEntries_MPIAIJ,
2193 /*25*/ MatZeroRows_MPIAIJ,
2194        0,
2195 #ifdef PETSC_HAVE_PBGL
2196        MatLUFactorNumeric_MPIAIJ,
2197 #else
2198        0,
2199 #endif
2200        0,
2201        0,
2202 /*30*/ MatSetUpPreallocation_MPIAIJ,
2203 #ifdef PETSC_HAVE_PBGL
2204        MatILUFactorSymbolic_MPIAIJ,
2205 #else
2206        0,
2207 #endif
2208        0,
2209        0,
2210        0,
2211 /*35*/ MatDuplicate_MPIAIJ,
2212        0,
2213        0,
2214        0,
2215        0,
2216 /*40*/ MatAXPY_MPIAIJ,
2217        MatGetSubMatrices_MPIAIJ,
2218        MatIncreaseOverlap_MPIAIJ,
2219        MatGetValues_MPIAIJ,
2220        MatCopy_MPIAIJ,
2221 /*45*/ 0,
2222        MatScale_MPIAIJ,
2223        0,
2224        0,
2225        0,
2226 /*50*/ MatSetBlockSize_MPIAIJ,
2227        0,
2228        0,
2229        0,
2230        0,
2231 /*55*/ MatFDColoringCreate_MPIAIJ,
2232        0,
2233        MatSetUnfactored_MPIAIJ,
2234        MatPermute_MPIAIJ,
2235        0,
2236 /*60*/ MatGetSubMatrix_MPIAIJ,
2237        MatDestroy_MPIAIJ,
2238        MatView_MPIAIJ,
2239        0,
2240        0,
2241 /*65*/ 0,
2242        0,
2243        0,
2244        0,
2245        0,
2246 /*70*/ 0,
2247        0,
2248        MatSetColoring_MPIAIJ,
2249 #if defined(PETSC_HAVE_ADIC)
2250        MatSetValuesAdic_MPIAIJ,
2251 #else
2252        0,
2253 #endif
2254        MatSetValuesAdifor_MPIAIJ,
2255 /*75*/ 0,
2256        0,
2257        0,
2258        0,
2259        0,
2260 /*80*/ 0,
2261        0,
2262        0,
2263        0,
2264 /*84*/ MatLoad_MPIAIJ,
2265        0,
2266        0,
2267        0,
2268        0,
2269        0,
2270 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2271        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2272        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2273        MatPtAP_Basic,
2274        MatPtAPSymbolic_MPIAIJ,
2275 /*95*/ MatPtAPNumeric_MPIAIJ,
2276        0,
2277        0,
2278        0,
2279        0,
2280 /*100*/0,
2281        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2282        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2283        MatConjugate_MPIAIJ,
2284        0,
2285 /*105*/MatSetValuesRow_MPIAIJ,
2286        MatRealPart_MPIAIJ,
2287        MatImaginaryPart_MPIAIJ,
2288        0,
2289        0,
2290 /*110*/0,
2291        MatGetRedundantMatrix_MPIAIJ};
2292 
2293 /* ----------------------------------------------------------------------------------------*/
2294 
2295 EXTERN_C_BEGIN
2296 #undef __FUNCT__
2297 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2298 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2299 {
2300   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2301   PetscErrorCode ierr;
2302 
2303   PetscFunctionBegin;
2304   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2305   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2306   PetscFunctionReturn(0);
2307 }
2308 EXTERN_C_END
2309 
2310 EXTERN_C_BEGIN
2311 #undef __FUNCT__
2312 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2313 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2314 {
2315   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2316   PetscErrorCode ierr;
2317 
2318   PetscFunctionBegin;
2319   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2320   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2321   PetscFunctionReturn(0);
2322 }
2323 EXTERN_C_END
2324 
2325 #include "petscpc.h"
2326 EXTERN_C_BEGIN
2327 #undef __FUNCT__
2328 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2329 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2330 {
2331   Mat_MPIAIJ     *b;
2332   PetscErrorCode ierr;
2333   PetscInt       i;
2334 
2335   PetscFunctionBegin;
2336   B->preallocated = PETSC_TRUE;
2337   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2338   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2339   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2340   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2341 
2342   B->rmap.bs = B->cmap.bs = 1;
2343   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2344   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2345   if (d_nnz) {
2346     for (i=0; i<B->rmap.n; i++) {
2347       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2348     }
2349   }
2350   if (o_nnz) {
2351     for (i=0; i<B->rmap.n; i++) {
2352       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2353     }
2354   }
2355   b = (Mat_MPIAIJ*)B->data;
2356 
2357   /* Explicitly create 2 MATSEQAIJ matrices. */
2358   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2359   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2360   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2361   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2362   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2363   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2364   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2365   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2366 
2367   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2368   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2369 
2370   PetscFunctionReturn(0);
2371 }
2372 EXTERN_C_END
2373 
2374 #undef __FUNCT__
2375 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2376 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2377 {
2378   Mat            mat;
2379   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2380   PetscErrorCode ierr;
2381 
2382   PetscFunctionBegin;
2383   *newmat       = 0;
2384   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2385   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2386   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2387   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2388   a    = (Mat_MPIAIJ*)mat->data;
2389 
2390   mat->factor       = matin->factor;
2391   mat->rmap.bs      = matin->rmap.bs;
2392   mat->assembled    = PETSC_TRUE;
2393   mat->insertmode   = NOT_SET_VALUES;
2394   mat->preallocated = PETSC_TRUE;
2395 
2396   a->size           = oldmat->size;
2397   a->rank           = oldmat->rank;
2398   a->donotstash     = oldmat->donotstash;
2399   a->roworiented    = oldmat->roworiented;
2400   a->rowindices     = 0;
2401   a->rowvalues      = 0;
2402   a->getrowactive   = PETSC_FALSE;
2403 
2404   ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2405   ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2406 
2407   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2408   if (oldmat->colmap) {
2409 #if defined (PETSC_USE_CTABLE)
2410     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2411 #else
2412     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2413     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2414     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2415 #endif
2416   } else a->colmap = 0;
2417   if (oldmat->garray) {
2418     PetscInt len;
2419     len  = oldmat->B->cmap.n;
2420     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2421     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2422     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2423   } else a->garray = 0;
2424 
2425   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2426   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2427   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2428   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2429   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2430   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2431   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2432   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2433   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2434   *newmat = mat;
2435   PetscFunctionReturn(0);
2436 }
2437 
2438 #include "petscsys.h"
2439 
2440 #undef __FUNCT__
2441 #define __FUNCT__ "MatLoad_MPIAIJ"
2442 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2443 {
2444   Mat            A;
2445   PetscScalar    *vals,*svals;
2446   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2447   MPI_Status     status;
2448   PetscErrorCode ierr;
2449   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2450   PetscInt       i,nz,j,rstart,rend,mmax;
2451   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2452   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2453   PetscInt       cend,cstart,n,*rowners;
2454   int            fd;
2455 
2456   PetscFunctionBegin;
2457   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2458   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2459   if (!rank) {
2460     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2461     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2462     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2463   }
2464 
2465   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2466   M = header[1]; N = header[2];
2467   /* determine ownership of all rows */
2468   m    = M/size + ((M % size) > rank);
2469   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2470   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2471 
2472   /* First process needs enough room for process with most rows */
2473   if (!rank) {
2474     mmax       = rowners[1];
2475     for (i=2; i<size; i++) {
2476       mmax = PetscMax(mmax,rowners[i]);
2477     }
2478   } else mmax = m;
2479 
2480   rowners[0] = 0;
2481   for (i=2; i<=size; i++) {
2482     rowners[i] += rowners[i-1];
2483   }
2484   rstart = rowners[rank];
2485   rend   = rowners[rank+1];
2486 
2487   /* distribute row lengths to all processors */
2488   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2489   if (!rank) {
2490     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2491     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2492     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2493     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2494     for (j=0; j<m; j++) {
2495       procsnz[0] += ourlens[j];
2496     }
2497     for (i=1; i<size; i++) {
2498       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2499       /* calculate the number of nonzeros on each processor */
2500       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2501         procsnz[i] += rowlengths[j];
2502       }
2503       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2504     }
2505     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2506   } else {
2507     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2508   }
2509 
2510   if (!rank) {
2511     /* determine max buffer needed and allocate it */
2512     maxnz = 0;
2513     for (i=0; i<size; i++) {
2514       maxnz = PetscMax(maxnz,procsnz[i]);
2515     }
2516     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2517 
2518     /* read in my part of the matrix column indices  */
2519     nz   = procsnz[0];
2520     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2521     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2522 
2523     /* read in every one elses and ship off */
2524     for (i=1; i<size; i++) {
2525       nz   = procsnz[i];
2526       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2527       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2528     }
2529     ierr = PetscFree(cols);CHKERRQ(ierr);
2530   } else {
2531     /* determine buffer space needed for message */
2532     nz = 0;
2533     for (i=0; i<m; i++) {
2534       nz += ourlens[i];
2535     }
2536     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2537 
2538     /* receive message of column indices*/
2539     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2540     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2541     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2542   }
2543 
2544   /* determine column ownership if matrix is not square */
2545   if (N != M) {
2546     n      = N/size + ((N % size) > rank);
2547     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2548     cstart = cend - n;
2549   } else {
2550     cstart = rstart;
2551     cend   = rend;
2552     n      = cend - cstart;
2553   }
2554 
2555   /* loop over local rows, determining number of off diagonal entries */
2556   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2557   jj = 0;
2558   for (i=0; i<m; i++) {
2559     for (j=0; j<ourlens[i]; j++) {
2560       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2561       jj++;
2562     }
2563   }
2564 
2565   /* create our matrix */
2566   for (i=0; i<m; i++) {
2567     ourlens[i] -= offlens[i];
2568   }
2569   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2570   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2571   ierr = MatSetType(A,type);CHKERRQ(ierr);
2572   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2573 
2574   for (i=0; i<m; i++) {
2575     ourlens[i] += offlens[i];
2576   }
2577 
2578   if (!rank) {
2579     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2580 
2581     /* read in my part of the matrix numerical values  */
2582     nz   = procsnz[0];
2583     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2584 
2585     /* insert into matrix */
2586     jj      = rstart;
2587     smycols = mycols;
2588     svals   = vals;
2589     for (i=0; i<m; i++) {
2590       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2591       smycols += ourlens[i];
2592       svals   += ourlens[i];
2593       jj++;
2594     }
2595 
2596     /* read in other processors and ship out */
2597     for (i=1; i<size; i++) {
2598       nz   = procsnz[i];
2599       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2600       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2601     }
2602     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2603   } else {
2604     /* receive numeric values */
2605     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2606 
2607     /* receive message of values*/
2608     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2609     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2610     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2611 
2612     /* insert into matrix */
2613     jj      = rstart;
2614     smycols = mycols;
2615     svals   = vals;
2616     for (i=0; i<m; i++) {
2617       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2618       smycols += ourlens[i];
2619       svals   += ourlens[i];
2620       jj++;
2621     }
2622   }
2623   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2624   ierr = PetscFree(vals);CHKERRQ(ierr);
2625   ierr = PetscFree(mycols);CHKERRQ(ierr);
2626   ierr = PetscFree(rowners);CHKERRQ(ierr);
2627 
2628   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2629   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2630   *newmat = A;
2631   PetscFunctionReturn(0);
2632 }
2633 
2634 #undef __FUNCT__
2635 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2636 /*
2637     Not great since it makes two copies of the submatrix, first an SeqAIJ
2638   in local and then by concatenating the local matrices the end result.
2639   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2640 */
2641 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2642 {
2643   PetscErrorCode ierr;
2644   PetscMPIInt    rank,size;
2645   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2646   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2647   Mat            *local,M,Mreuse;
2648   PetscScalar    *vwork,*aa;
2649   MPI_Comm       comm = mat->comm;
2650   Mat_SeqAIJ     *aij;
2651 
2652 
2653   PetscFunctionBegin;
2654   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2655   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2656 
2657   if (call ==  MAT_REUSE_MATRIX) {
2658     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2659     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2660     local = &Mreuse;
2661     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2662   } else {
2663     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2664     Mreuse = *local;
2665     ierr   = PetscFree(local);CHKERRQ(ierr);
2666   }
2667 
2668   /*
2669       m - number of local rows
2670       n - number of columns (same on all processors)
2671       rstart - first row in new global matrix generated
2672   */
2673   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2674   if (call == MAT_INITIAL_MATRIX) {
2675     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2676     ii  = aij->i;
2677     jj  = aij->j;
2678 
2679     /*
2680         Determine the number of non-zeros in the diagonal and off-diagonal
2681         portions of the matrix in order to do correct preallocation
2682     */
2683 
2684     /* first get start and end of "diagonal" columns */
2685     if (csize == PETSC_DECIDE) {
2686       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2687       if (mglobal == n) { /* square matrix */
2688 	nlocal = m;
2689       } else {
2690         nlocal = n/size + ((n % size) > rank);
2691       }
2692     } else {
2693       nlocal = csize;
2694     }
2695     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2696     rstart = rend - nlocal;
2697     if (rank == size - 1 && rend != n) {
2698       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2699     }
2700 
2701     /* next, compute all the lengths */
2702     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2703     olens = dlens + m;
2704     for (i=0; i<m; i++) {
2705       jend = ii[i+1] - ii[i];
2706       olen = 0;
2707       dlen = 0;
2708       for (j=0; j<jend; j++) {
2709         if (*jj < rstart || *jj >= rend) olen++;
2710         else dlen++;
2711         jj++;
2712       }
2713       olens[i] = olen;
2714       dlens[i] = dlen;
2715     }
2716     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2717     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2718     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2719     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2720     ierr = PetscFree(dlens);CHKERRQ(ierr);
2721   } else {
2722     PetscInt ml,nl;
2723 
2724     M = *newmat;
2725     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2726     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2727     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2728     /*
2729          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2730        rather than the slower MatSetValues().
2731     */
2732     M->was_assembled = PETSC_TRUE;
2733     M->assembled     = PETSC_FALSE;
2734   }
2735   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2736   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2737   ii  = aij->i;
2738   jj  = aij->j;
2739   aa  = aij->a;
2740   for (i=0; i<m; i++) {
2741     row   = rstart + i;
2742     nz    = ii[i+1] - ii[i];
2743     cwork = jj;     jj += nz;
2744     vwork = aa;     aa += nz;
2745     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2746   }
2747 
2748   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2749   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2750   *newmat = M;
2751 
2752   /* save submatrix used in processor for next request */
2753   if (call ==  MAT_INITIAL_MATRIX) {
2754     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2755     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2756   }
2757 
2758   PetscFunctionReturn(0);
2759 }
2760 
2761 EXTERN_C_BEGIN
2762 #undef __FUNCT__
2763 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2764 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2765 {
2766   PetscInt       m,cstart, cend,j,nnz,i,d;
2767   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2768   const PetscInt *JJ;
2769   PetscScalar    *values;
2770   PetscErrorCode ierr;
2771 
2772   PetscFunctionBegin;
2773   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2774 
2775   B->rmap.bs = B->cmap.bs = 1;
2776   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2777   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2778   m      = B->rmap.n;
2779   cstart = B->cmap.rstart;
2780   cend   = B->cmap.rend;
2781   rstart = B->rmap.rstart;
2782 
2783   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2784   o_nnz = d_nnz + m;
2785 
2786   for (i=0; i<m; i++) {
2787     nnz     = Ii[i+1]- Ii[i];
2788     JJ      = J + Ii[i];
2789     nnz_max = PetscMax(nnz_max,nnz);
2790     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2791     for (j=0; j<nnz; j++) {
2792       if (*JJ >= cstart) break;
2793       JJ++;
2794     }
2795     d = 0;
2796     for (; j<nnz; j++) {
2797       if (*JJ++ >= cend) break;
2798       d++;
2799     }
2800     d_nnz[i] = d;
2801     o_nnz[i] = nnz - d;
2802   }
2803   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2804   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2805 
2806   if (v) values = (PetscScalar*)v;
2807   else {
2808     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2809     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2810   }
2811 
2812   for (i=0; i<m; i++) {
2813     ii   = i + rstart;
2814     nnz  = Ii[i+1]- Ii[i];
2815     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2816   }
2817   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2818   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2819 
2820   if (!v) {
2821     ierr = PetscFree(values);CHKERRQ(ierr);
2822   }
2823   PetscFunctionReturn(0);
2824 }
2825 EXTERN_C_END
2826 
2827 #undef __FUNCT__
2828 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2829 /*@
2830    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2831    (the default parallel PETSc format).
2832 
2833    Collective on MPI_Comm
2834 
2835    Input Parameters:
2836 +  B - the matrix
2837 .  i - the indices into j for the start of each local row (starts with zero)
2838 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2839 -  v - optional values in the matrix
2840 
2841    Level: developer
2842 
2843    Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal
2844      storage format. Thus changing the values in a[] after this call will not effect the matrix values.
2845 
2846 .keywords: matrix, aij, compressed row, sparse, parallel
2847 
2848 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
2849           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
2850 @*/
2851 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2852 {
2853   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2854 
2855   PetscFunctionBegin;
2856   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2857   if (f) {
2858     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2859   }
2860   PetscFunctionReturn(0);
2861 }
2862 
2863 #undef __FUNCT__
2864 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2865 /*@C
2866    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2867    (the default parallel PETSc format).  For good matrix assembly performance
2868    the user should preallocate the matrix storage by setting the parameters
2869    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2870    performance can be increased by more than a factor of 50.
2871 
2872    Collective on MPI_Comm
2873 
2874    Input Parameters:
2875 +  A - the matrix
2876 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2877            (same value is used for all local rows)
2878 .  d_nnz - array containing the number of nonzeros in the various rows of the
2879            DIAGONAL portion of the local submatrix (possibly different for each row)
2880            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2881            The size of this array is equal to the number of local rows, i.e 'm'.
2882            You must leave room for the diagonal entry even if it is zero.
2883 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2884            submatrix (same value is used for all local rows).
2885 -  o_nnz - array containing the number of nonzeros in the various rows of the
2886            OFF-DIAGONAL portion of the local submatrix (possibly different for
2887            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2888            structure. The size of this array is equal to the number
2889            of local rows, i.e 'm'.
2890 
2891    If the *_nnz parameter is given then the *_nz parameter is ignored
2892 
2893    The AIJ format (also called the Yale sparse matrix format or
2894    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2895    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2896 
2897    The parallel matrix is partitioned such that the first m0 rows belong to
2898    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2899    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2900 
2901    The DIAGONAL portion of the local submatrix of a processor can be defined
2902    as the submatrix which is obtained by extraction the part corresponding
2903    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2904    first row that belongs to the processor, and r2 is the last row belonging
2905    to the this processor. This is a square mxm matrix. The remaining portion
2906    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2907 
2908    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2909 
2910    Example usage:
2911 
2912    Consider the following 8x8 matrix with 34 non-zero values, that is
2913    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2914    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2915    as follows:
2916 
2917 .vb
2918             1  2  0  |  0  3  0  |  0  4
2919     Proc0   0  5  6  |  7  0  0  |  8  0
2920             9  0 10  | 11  0  0  | 12  0
2921     -------------------------------------
2922            13  0 14  | 15 16 17  |  0  0
2923     Proc1   0 18  0  | 19 20 21  |  0  0
2924             0  0  0  | 22 23  0  | 24  0
2925     -------------------------------------
2926     Proc2  25 26 27  |  0  0 28  | 29  0
2927            30  0  0  | 31 32 33  |  0 34
2928 .ve
2929 
2930    This can be represented as a collection of submatrices as:
2931 
2932 .vb
2933       A B C
2934       D E F
2935       G H I
2936 .ve
2937 
2938    Where the submatrices A,B,C are owned by proc0, D,E,F are
2939    owned by proc1, G,H,I are owned by proc2.
2940 
2941    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2942    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2943    The 'M','N' parameters are 8,8, and have the same values on all procs.
2944 
2945    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2946    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2947    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2948    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2949    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2950    matrix, ans [DF] as another SeqAIJ matrix.
2951 
2952    When d_nz, o_nz parameters are specified, d_nz storage elements are
2953    allocated for every row of the local diagonal submatrix, and o_nz
2954    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2955    One way to choose d_nz and o_nz is to use the max nonzerors per local
2956    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2957    In this case, the values of d_nz,o_nz are:
2958 .vb
2959      proc0 : dnz = 2, o_nz = 2
2960      proc1 : dnz = 3, o_nz = 2
2961      proc2 : dnz = 1, o_nz = 4
2962 .ve
2963    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2964    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2965    for proc3. i.e we are using 12+15+10=37 storage locations to store
2966    34 values.
2967 
2968    When d_nnz, o_nnz parameters are specified, the storage is specified
2969    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2970    In the above case the values for d_nnz,o_nnz are:
2971 .vb
2972      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2973      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2974      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2975 .ve
2976    Here the space allocated is sum of all the above values i.e 34, and
2977    hence pre-allocation is perfect.
2978 
2979    Level: intermediate
2980 
2981 .keywords: matrix, aij, compressed row, sparse, parallel
2982 
2983 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2984           MPIAIJ
2985 @*/
2986 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2987 {
2988   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2989 
2990   PetscFunctionBegin;
2991   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2992   if (f) {
2993     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2994   }
2995   PetscFunctionReturn(0);
2996 }
2997 
2998 #undef __FUNCT__
2999 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3000 /*@C
3001      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3002          CSR format the local rows.
3003 
3004    Collective on MPI_Comm
3005 
3006    Input Parameters:
3007 +  comm - MPI communicator
3008 .  m - number of local rows (Cannot be PETSC_DECIDE)
3009 .  n - This value should be the same as the local size used in creating the
3010        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3011        calculated if N is given) For square matrices n is almost always m.
3012 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3013 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3014 .   i - row indices
3015 .   j - column indices
3016 -   a - matrix values
3017 
3018    Output Parameter:
3019 .   mat - the matrix
3020 
3021    Level: intermediate
3022 
3023    Notes:
3024        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3025      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3026      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3027 
3028        The i and j indices are 0 based
3029 
3030 .keywords: matrix, aij, compressed row, sparse, parallel
3031 
3032 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3033           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3034 @*/
3035 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3036 {
3037   PetscErrorCode ierr;
3038 
3039  PetscFunctionBegin;
3040   if (i[0]) {
3041     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3042   }
3043   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3044   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3045   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3046   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3047   PetscFunctionReturn(0);
3048 }
3049 
3050 #undef __FUNCT__
3051 #define __FUNCT__ "MatCreateMPIAIJ"
3052 /*@C
3053    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3054    (the default parallel PETSc format).  For good matrix assembly performance
3055    the user should preallocate the matrix storage by setting the parameters
3056    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3057    performance can be increased by more than a factor of 50.
3058 
3059    Collective on MPI_Comm
3060 
3061    Input Parameters:
3062 +  comm - MPI communicator
3063 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3064            This value should be the same as the local size used in creating the
3065            y vector for the matrix-vector product y = Ax.
3066 .  n - This value should be the same as the local size used in creating the
3067        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3068        calculated if N is given) For square matrices n is almost always m.
3069 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3070 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3071 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3072            (same value is used for all local rows)
3073 .  d_nnz - array containing the number of nonzeros in the various rows of the
3074            DIAGONAL portion of the local submatrix (possibly different for each row)
3075            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3076            The size of this array is equal to the number of local rows, i.e 'm'.
3077            You must leave room for the diagonal entry even if it is zero.
3078 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3079            submatrix (same value is used for all local rows).
3080 -  o_nnz - array containing the number of nonzeros in the various rows of the
3081            OFF-DIAGONAL portion of the local submatrix (possibly different for
3082            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3083            structure. The size of this array is equal to the number
3084            of local rows, i.e 'm'.
3085 
3086    Output Parameter:
3087 .  A - the matrix
3088 
3089    Notes:
3090    If the *_nnz parameter is given then the *_nz parameter is ignored
3091 
3092    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3093    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3094    storage requirements for this matrix.
3095 
3096    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3097    processor than it must be used on all processors that share the object for
3098    that argument.
3099 
3100    The user MUST specify either the local or global matrix dimensions
3101    (possibly both).
3102 
3103    The parallel matrix is partitioned across processors such that the
3104    first m0 rows belong to process 0, the next m1 rows belong to
3105    process 1, the next m2 rows belong to process 2 etc.. where
3106    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3107    values corresponding to [m x N] submatrix.
3108 
3109    The columns are logically partitioned with the n0 columns belonging
3110    to 0th partition, the next n1 columns belonging to the next
3111    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3112 
3113    The DIAGONAL portion of the local submatrix on any given processor
3114    is the submatrix corresponding to the rows and columns m,n
3115    corresponding to the given processor. i.e diagonal matrix on
3116    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3117    etc. The remaining portion of the local submatrix [m x (N-n)]
3118    constitute the OFF-DIAGONAL portion. The example below better
3119    illustrates this concept.
3120 
3121    For a square global matrix we define each processor's diagonal portion
3122    to be its local rows and the corresponding columns (a square submatrix);
3123    each processor's off-diagonal portion encompasses the remainder of the
3124    local matrix (a rectangular submatrix).
3125 
3126    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3127 
3128    When calling this routine with a single process communicator, a matrix of
3129    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3130    type of communicator, use the construction mechanism:
3131      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3132 
3133    By default, this format uses inodes (identical nodes) when possible.
3134    We search for consecutive rows with the same nonzero structure, thereby
3135    reusing matrix information to achieve increased efficiency.
3136 
3137    Options Database Keys:
3138 +  -mat_no_inode  - Do not use inodes
3139 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3140 -  -mat_aij_oneindex - Internally use indexing starting at 1
3141         rather than 0.  Note that when calling MatSetValues(),
3142         the user still MUST index entries starting at 0!
3143 
3144 
3145    Example usage:
3146 
3147    Consider the following 8x8 matrix with 34 non-zero values, that is
3148    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3149    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3150    as follows:
3151 
3152 .vb
3153             1  2  0  |  0  3  0  |  0  4
3154     Proc0   0  5  6  |  7  0  0  |  8  0
3155             9  0 10  | 11  0  0  | 12  0
3156     -------------------------------------
3157            13  0 14  | 15 16 17  |  0  0
3158     Proc1   0 18  0  | 19 20 21  |  0  0
3159             0  0  0  | 22 23  0  | 24  0
3160     -------------------------------------
3161     Proc2  25 26 27  |  0  0 28  | 29  0
3162            30  0  0  | 31 32 33  |  0 34
3163 .ve
3164 
3165    This can be represented as a collection of submatrices as:
3166 
3167 .vb
3168       A B C
3169       D E F
3170       G H I
3171 .ve
3172 
3173    Where the submatrices A,B,C are owned by proc0, D,E,F are
3174    owned by proc1, G,H,I are owned by proc2.
3175 
3176    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3177    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3178    The 'M','N' parameters are 8,8, and have the same values on all procs.
3179 
3180    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3181    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3182    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3183    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3184    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3185    matrix, ans [DF] as another SeqAIJ matrix.
3186 
3187    When d_nz, o_nz parameters are specified, d_nz storage elements are
3188    allocated for every row of the local diagonal submatrix, and o_nz
3189    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3190    One way to choose d_nz and o_nz is to use the max nonzerors per local
3191    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3192    In this case, the values of d_nz,o_nz are:
3193 .vb
3194      proc0 : dnz = 2, o_nz = 2
3195      proc1 : dnz = 3, o_nz = 2
3196      proc2 : dnz = 1, o_nz = 4
3197 .ve
3198    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3199    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3200    for proc3. i.e we are using 12+15+10=37 storage locations to store
3201    34 values.
3202 
3203    When d_nnz, o_nnz parameters are specified, the storage is specified
3204    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3205    In the above case the values for d_nnz,o_nnz are:
3206 .vb
3207      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3208      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3209      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3210 .ve
3211    Here the space allocated is sum of all the above values i.e 34, and
3212    hence pre-allocation is perfect.
3213 
3214    Level: intermediate
3215 
3216 .keywords: matrix, aij, compressed row, sparse, parallel
3217 
3218 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3219           MPIAIJ, MatCreateMPIAIJWithArrays()
3220 @*/
3221 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3222 {
3223   PetscErrorCode ierr;
3224   PetscMPIInt    size;
3225 
3226   PetscFunctionBegin;
3227   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3228   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3229   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3230   if (size > 1) {
3231     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3232     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3233   } else {
3234     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3235     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3236   }
3237   PetscFunctionReturn(0);
3238 }
3239 
3240 #undef __FUNCT__
3241 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3242 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3243 {
3244   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3245 
3246   PetscFunctionBegin;
3247   *Ad     = a->A;
3248   *Ao     = a->B;
3249   *colmap = a->garray;
3250   PetscFunctionReturn(0);
3251 }
3252 
3253 #undef __FUNCT__
3254 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3255 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3256 {
3257   PetscErrorCode ierr;
3258   PetscInt       i;
3259   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3260 
3261   PetscFunctionBegin;
3262   if (coloring->ctype == IS_COLORING_GLOBAL) {
3263     ISColoringValue *allcolors,*colors;
3264     ISColoring      ocoloring;
3265 
3266     /* set coloring for diagonal portion */
3267     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3268 
3269     /* set coloring for off-diagonal portion */
3270     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3271     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3272     for (i=0; i<a->B->cmap.n; i++) {
3273       colors[i] = allcolors[a->garray[i]];
3274     }
3275     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3276     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3277     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3278     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3279   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3280     ISColoringValue *colors;
3281     PetscInt        *larray;
3282     ISColoring      ocoloring;
3283 
3284     /* set coloring for diagonal portion */
3285     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3286     for (i=0; i<a->A->cmap.n; i++) {
3287       larray[i] = i + A->cmap.rstart;
3288     }
3289     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3290     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3291     for (i=0; i<a->A->cmap.n; i++) {
3292       colors[i] = coloring->colors[larray[i]];
3293     }
3294     ierr = PetscFree(larray);CHKERRQ(ierr);
3295     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3296     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3297     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3298 
3299     /* set coloring for off-diagonal portion */
3300     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3301     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3302     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3303     for (i=0; i<a->B->cmap.n; i++) {
3304       colors[i] = coloring->colors[larray[i]];
3305     }
3306     ierr = PetscFree(larray);CHKERRQ(ierr);
3307     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3308     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3309     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3310   } else {
3311     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3312   }
3313 
3314   PetscFunctionReturn(0);
3315 }
3316 
3317 #if defined(PETSC_HAVE_ADIC)
3318 #undef __FUNCT__
3319 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3320 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3321 {
3322   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3323   PetscErrorCode ierr;
3324 
3325   PetscFunctionBegin;
3326   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3327   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3328   PetscFunctionReturn(0);
3329 }
3330 #endif
3331 
3332 #undef __FUNCT__
3333 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3334 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3335 {
3336   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3337   PetscErrorCode ierr;
3338 
3339   PetscFunctionBegin;
3340   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3341   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3342   PetscFunctionReturn(0);
3343 }
3344 
3345 #undef __FUNCT__
3346 #define __FUNCT__ "MatMerge"
3347 /*@C
3348       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3349                  matrices from each processor
3350 
3351     Collective on MPI_Comm
3352 
3353    Input Parameters:
3354 +    comm - the communicators the parallel matrix will live on
3355 .    inmat - the input sequential matrices
3356 .    n - number of local columns (or PETSC_DECIDE)
3357 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3358 
3359    Output Parameter:
3360 .    outmat - the parallel matrix generated
3361 
3362     Level: advanced
3363 
3364    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3365 
3366 @*/
3367 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3368 {
3369   PetscErrorCode ierr;
3370   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3371   PetscInt       *indx;
3372   PetscScalar    *values;
3373 
3374   PetscFunctionBegin;
3375   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3376   if (scall == MAT_INITIAL_MATRIX){
3377     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3378     if (n == PETSC_DECIDE){
3379       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3380     }
3381     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3382     rstart -= m;
3383 
3384     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3385     for (i=0;i<m;i++) {
3386       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3387       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3388       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3389     }
3390     /* This routine will ONLY return MPIAIJ type matrix */
3391     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3392     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3393     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3394     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3395     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3396 
3397   } else if (scall == MAT_REUSE_MATRIX){
3398     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3399   } else {
3400     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3401   }
3402 
3403   for (i=0;i<m;i++) {
3404     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3405     Ii    = i + rstart;
3406     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3407     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3408   }
3409   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3410   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3411   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3412 
3413   PetscFunctionReturn(0);
3414 }
3415 
3416 #undef __FUNCT__
3417 #define __FUNCT__ "MatFileSplit"
3418 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3419 {
3420   PetscErrorCode    ierr;
3421   PetscMPIInt       rank;
3422   PetscInt          m,N,i,rstart,nnz;
3423   size_t            len;
3424   const PetscInt    *indx;
3425   PetscViewer       out;
3426   char              *name;
3427   Mat               B;
3428   const PetscScalar *values;
3429 
3430   PetscFunctionBegin;
3431   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3432   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3433   /* Should this be the type of the diagonal block of A? */
3434   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3435   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3436   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3437   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3438   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3439   for (i=0;i<m;i++) {
3440     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3441     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3442     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3443   }
3444   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3445   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3446 
3447   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
3448   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3449   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3450   sprintf(name,"%s.%d",outfile,rank);
3451   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3452   ierr = PetscFree(name);
3453   ierr = MatView(B,out);CHKERRQ(ierr);
3454   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3455   ierr = MatDestroy(B);CHKERRQ(ierr);
3456   PetscFunctionReturn(0);
3457 }
3458 
3459 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3460 #undef __FUNCT__
3461 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3462 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3463 {
3464   PetscErrorCode       ierr;
3465   Mat_Merge_SeqsToMPI  *merge;
3466   PetscContainer       container;
3467 
3468   PetscFunctionBegin;
3469   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3470   if (container) {
3471     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3472     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3473     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3474     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3475     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3476     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3477     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3478     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3479     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3480     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3481     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3482     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3483 
3484     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3485     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3486   }
3487   ierr = PetscFree(merge);CHKERRQ(ierr);
3488 
3489   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3490   PetscFunctionReturn(0);
3491 }
3492 
3493 #include "src/mat/utils/freespace.h"
3494 #include "petscbt.h"
3495 static PetscEvent logkey_seqstompinum = 0;
3496 #undef __FUNCT__
3497 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3498 /*@C
3499       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3500                  matrices from each processor
3501 
3502     Collective on MPI_Comm
3503 
3504    Input Parameters:
3505 +    comm - the communicators the parallel matrix will live on
3506 .    seqmat - the input sequential matrices
3507 .    m - number of local rows (or PETSC_DECIDE)
3508 .    n - number of local columns (or PETSC_DECIDE)
3509 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3510 
3511    Output Parameter:
3512 .    mpimat - the parallel matrix generated
3513 
3514     Level: advanced
3515 
3516    Notes:
3517      The dimensions of the sequential matrix in each processor MUST be the same.
3518      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3519      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3520 @*/
3521 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3522 {
3523   PetscErrorCode       ierr;
3524   MPI_Comm             comm=mpimat->comm;
3525   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3526   PetscMPIInt          size,rank,taga,*len_s;
3527   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3528   PetscInt             proc,m;
3529   PetscInt             **buf_ri,**buf_rj;
3530   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3531   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3532   MPI_Request          *s_waits,*r_waits;
3533   MPI_Status           *status;
3534   MatScalar            *aa=a->a,**abuf_r,*ba_i;
3535   Mat_Merge_SeqsToMPI  *merge;
3536   PetscContainer       container;
3537 
3538   PetscFunctionBegin;
3539   if (!logkey_seqstompinum) {
3540     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
3541   }
3542   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3543 
3544   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3545   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3546 
3547   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3548   if (container) {
3549     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3550   }
3551   bi     = merge->bi;
3552   bj     = merge->bj;
3553   buf_ri = merge->buf_ri;
3554   buf_rj = merge->buf_rj;
3555 
3556   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3557   owners = merge->rowmap.range;
3558   len_s  = merge->len_s;
3559 
3560   /* send and recv matrix values */
3561   /*-----------------------------*/
3562   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3563   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3564 
3565   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3566   for (proc=0,k=0; proc<size; proc++){
3567     if (!len_s[proc]) continue;
3568     i = owners[proc];
3569     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3570     k++;
3571   }
3572 
3573   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3574   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3575   ierr = PetscFree(status);CHKERRQ(ierr);
3576 
3577   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3578   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3579 
3580   /* insert mat values of mpimat */
3581   /*----------------------------*/
3582   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3583   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3584   nextrow = buf_ri_k + merge->nrecv;
3585   nextai  = nextrow + merge->nrecv;
3586 
3587   for (k=0; k<merge->nrecv; k++){
3588     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3589     nrows = *(buf_ri_k[k]);
3590     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3591     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3592   }
3593 
3594   /* set values of ba */
3595   m = merge->rowmap.n;
3596   for (i=0; i<m; i++) {
3597     arow = owners[rank] + i;
3598     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3599     bnzi = bi[i+1] - bi[i];
3600     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3601 
3602     /* add local non-zero vals of this proc's seqmat into ba */
3603     anzi = ai[arow+1] - ai[arow];
3604     aj   = a->j + ai[arow];
3605     aa   = a->a + ai[arow];
3606     nextaj = 0;
3607     for (j=0; nextaj<anzi; j++){
3608       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3609         ba_i[j] += aa[nextaj++];
3610       }
3611     }
3612 
3613     /* add received vals into ba */
3614     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3615       /* i-th row */
3616       if (i == *nextrow[k]) {
3617         anzi = *(nextai[k]+1) - *nextai[k];
3618         aj   = buf_rj[k] + *(nextai[k]);
3619         aa   = abuf_r[k] + *(nextai[k]);
3620         nextaj = 0;
3621         for (j=0; nextaj<anzi; j++){
3622           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3623             ba_i[j] += aa[nextaj++];
3624           }
3625         }
3626         nextrow[k]++; nextai[k]++;
3627       }
3628     }
3629     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3630   }
3631   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3632   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3633 
3634   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3635   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3636   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3637   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3638   PetscFunctionReturn(0);
3639 }
3640 
3641 static PetscEvent logkey_seqstompisym = 0;
3642 #undef __FUNCT__
3643 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3644 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3645 {
3646   PetscErrorCode       ierr;
3647   Mat                  B_mpi;
3648   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3649   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3650   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3651   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3652   PetscInt             len,proc,*dnz,*onz;
3653   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3654   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3655   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3656   MPI_Status           *status;
3657   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3658   PetscBT              lnkbt;
3659   Mat_Merge_SeqsToMPI  *merge;
3660   PetscContainer       container;
3661 
3662   PetscFunctionBegin;
3663   if (!logkey_seqstompisym) {
3664     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3665   }
3666   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3667 
3668   /* make sure it is a PETSc comm */
3669   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3670   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3671   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3672 
3673   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3674   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3675 
3676   /* determine row ownership */
3677   /*---------------------------------------------------------*/
3678   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3679   merge->rowmap.n = m;
3680   merge->rowmap.N = M;
3681   merge->rowmap.bs = 1;
3682   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
3683   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3684   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3685 
3686   m      = merge->rowmap.n;
3687   M      = merge->rowmap.N;
3688   owners = merge->rowmap.range;
3689 
3690   /* determine the number of messages to send, their lengths */
3691   /*---------------------------------------------------------*/
3692   len_s  = merge->len_s;
3693 
3694   len = 0;  /* length of buf_si[] */
3695   merge->nsend = 0;
3696   for (proc=0; proc<size; proc++){
3697     len_si[proc] = 0;
3698     if (proc == rank){
3699       len_s[proc] = 0;
3700     } else {
3701       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3702       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3703     }
3704     if (len_s[proc]) {
3705       merge->nsend++;
3706       nrows = 0;
3707       for (i=owners[proc]; i<owners[proc+1]; i++){
3708         if (ai[i+1] > ai[i]) nrows++;
3709       }
3710       len_si[proc] = 2*(nrows+1);
3711       len += len_si[proc];
3712     }
3713   }
3714 
3715   /* determine the number and length of messages to receive for ij-structure */
3716   /*-------------------------------------------------------------------------*/
3717   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3718   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3719 
3720   /* post the Irecv of j-structure */
3721   /*-------------------------------*/
3722   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
3723   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3724 
3725   /* post the Isend of j-structure */
3726   /*--------------------------------*/
3727   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3728   sj_waits = si_waits + merge->nsend;
3729 
3730   for (proc=0, k=0; proc<size; proc++){
3731     if (!len_s[proc]) continue;
3732     i = owners[proc];
3733     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3734     k++;
3735   }
3736 
3737   /* receives and sends of j-structure are complete */
3738   /*------------------------------------------------*/
3739   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3740   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3741 
3742   /* send and recv i-structure */
3743   /*---------------------------*/
3744   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
3745   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3746 
3747   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3748   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3749   for (proc=0,k=0; proc<size; proc++){
3750     if (!len_s[proc]) continue;
3751     /* form outgoing message for i-structure:
3752          buf_si[0]:                 nrows to be sent
3753                [1:nrows]:           row index (global)
3754                [nrows+1:2*nrows+1]: i-structure index
3755     */
3756     /*-------------------------------------------*/
3757     nrows = len_si[proc]/2 - 1;
3758     buf_si_i    = buf_si + nrows+1;
3759     buf_si[0]   = nrows;
3760     buf_si_i[0] = 0;
3761     nrows = 0;
3762     for (i=owners[proc]; i<owners[proc+1]; i++){
3763       anzi = ai[i+1] - ai[i];
3764       if (anzi) {
3765         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3766         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3767         nrows++;
3768       }
3769     }
3770     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3771     k++;
3772     buf_si += len_si[proc];
3773   }
3774 
3775   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3776   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3777 
3778   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3779   for (i=0; i<merge->nrecv; i++){
3780     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3781   }
3782 
3783   ierr = PetscFree(len_si);CHKERRQ(ierr);
3784   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3785   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3786   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3787   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3788   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3789   ierr = PetscFree(status);CHKERRQ(ierr);
3790 
3791   /* compute a local seq matrix in each processor */
3792   /*----------------------------------------------*/
3793   /* allocate bi array and free space for accumulating nonzero column info */
3794   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3795   bi[0] = 0;
3796 
3797   /* create and initialize a linked list */
3798   nlnk = N+1;
3799   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3800 
3801   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3802   len = 0;
3803   len  = ai[owners[rank+1]] - ai[owners[rank]];
3804   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3805   current_space = free_space;
3806 
3807   /* determine symbolic info for each local row */
3808   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3809   nextrow = buf_ri_k + merge->nrecv;
3810   nextai  = nextrow + merge->nrecv;
3811   for (k=0; k<merge->nrecv; k++){
3812     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3813     nrows = *buf_ri_k[k];
3814     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3815     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3816   }
3817 
3818   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3819   len = 0;
3820   for (i=0;i<m;i++) {
3821     bnzi   = 0;
3822     /* add local non-zero cols of this proc's seqmat into lnk */
3823     arow   = owners[rank] + i;
3824     anzi   = ai[arow+1] - ai[arow];
3825     aj     = a->j + ai[arow];
3826     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3827     bnzi += nlnk;
3828     /* add received col data into lnk */
3829     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3830       if (i == *nextrow[k]) { /* i-th row */
3831         anzi = *(nextai[k]+1) - *nextai[k];
3832         aj   = buf_rj[k] + *nextai[k];
3833         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3834         bnzi += nlnk;
3835         nextrow[k]++; nextai[k]++;
3836       }
3837     }
3838     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3839 
3840     /* if free space is not available, make more free space */
3841     if (current_space->local_remaining<bnzi) {
3842       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3843       nspacedouble++;
3844     }
3845     /* copy data into free space, then initialize lnk */
3846     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3847     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3848 
3849     current_space->array           += bnzi;
3850     current_space->local_used      += bnzi;
3851     current_space->local_remaining -= bnzi;
3852 
3853     bi[i+1] = bi[i] + bnzi;
3854   }
3855 
3856   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3857 
3858   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3859   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3860   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3861 
3862   /* create symbolic parallel matrix B_mpi */
3863   /*---------------------------------------*/
3864   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3865   if (n==PETSC_DECIDE) {
3866     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3867   } else {
3868     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3869   }
3870   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3871   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3872   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3873 
3874   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3875   B_mpi->assembled     = PETSC_FALSE;
3876   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3877   merge->bi            = bi;
3878   merge->bj            = bj;
3879   merge->buf_ri        = buf_ri;
3880   merge->buf_rj        = buf_rj;
3881   merge->coi           = PETSC_NULL;
3882   merge->coj           = PETSC_NULL;
3883   merge->owners_co     = PETSC_NULL;
3884 
3885   /* attach the supporting struct to B_mpi for reuse */
3886   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3887   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
3888   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3889   *mpimat = B_mpi;
3890 
3891   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3892   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3893   PetscFunctionReturn(0);
3894 }
3895 
3896 static PetscEvent logkey_seqstompi = 0;
3897 #undef __FUNCT__
3898 #define __FUNCT__ "MatMerge_SeqsToMPI"
3899 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3900 {
3901   PetscErrorCode   ierr;
3902 
3903   PetscFunctionBegin;
3904   if (!logkey_seqstompi) {
3905     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3906   }
3907   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3908   if (scall == MAT_INITIAL_MATRIX){
3909     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3910   }
3911   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3912   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3913   PetscFunctionReturn(0);
3914 }
3915 static PetscEvent logkey_getlocalmat = 0;
3916 #undef __FUNCT__
3917 #define __FUNCT__ "MatGetLocalMat"
3918 /*@C
3919      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3920 
3921     Not Collective
3922 
3923    Input Parameters:
3924 +    A - the matrix
3925 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3926 
3927    Output Parameter:
3928 .    A_loc - the local sequential matrix generated
3929 
3930     Level: developer
3931 
3932 @*/
3933 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3934 {
3935   PetscErrorCode  ierr;
3936   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3937   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3938   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3939   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3940   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
3941   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3942 
3943   PetscFunctionBegin;
3944   if (!logkey_getlocalmat) {
3945     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3946   }
3947   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3948   if (scall == MAT_INITIAL_MATRIX){
3949     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3950     ci[0] = 0;
3951     for (i=0; i<am; i++){
3952       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3953     }
3954     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3955     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3956     k = 0;
3957     for (i=0; i<am; i++) {
3958       ncols_o = bi[i+1] - bi[i];
3959       ncols_d = ai[i+1] - ai[i];
3960       /* off-diagonal portion of A */
3961       for (jo=0; jo<ncols_o; jo++) {
3962         col = cmap[*bj];
3963         if (col >= cstart) break;
3964         cj[k]   = col; bj++;
3965         ca[k++] = *ba++;
3966       }
3967       /* diagonal portion of A */
3968       for (j=0; j<ncols_d; j++) {
3969         cj[k]   = cstart + *aj++;
3970         ca[k++] = *aa++;
3971       }
3972       /* off-diagonal portion of A */
3973       for (j=jo; j<ncols_o; j++) {
3974         cj[k]   = cmap[*bj++];
3975         ca[k++] = *ba++;
3976       }
3977     }
3978     /* put together the new matrix */
3979     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3980     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3981     /* Since these are PETSc arrays, change flags to free them as necessary. */
3982     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
3983     mat->free_a  = PETSC_TRUE;
3984     mat->free_ij = PETSC_TRUE;
3985     mat->nonew   = 0;
3986   } else if (scall == MAT_REUSE_MATRIX){
3987     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3988     ci = mat->i; cj = mat->j; ca = mat->a;
3989     for (i=0; i<am; i++) {
3990       /* off-diagonal portion of A */
3991       ncols_o = bi[i+1] - bi[i];
3992       for (jo=0; jo<ncols_o; jo++) {
3993         col = cmap[*bj];
3994         if (col >= cstart) break;
3995         *ca++ = *ba++; bj++;
3996       }
3997       /* diagonal portion of A */
3998       ncols_d = ai[i+1] - ai[i];
3999       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
4000       /* off-diagonal portion of A */
4001       for (j=jo; j<ncols_o; j++) {
4002         *ca++ = *ba++; bj++;
4003       }
4004     }
4005   } else {
4006     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4007   }
4008 
4009   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
4010   PetscFunctionReturn(0);
4011 }
4012 
4013 static PetscEvent logkey_getlocalmatcondensed = 0;
4014 #undef __FUNCT__
4015 #define __FUNCT__ "MatGetLocalMatCondensed"
4016 /*@C
4017      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4018 
4019     Not Collective
4020 
4021    Input Parameters:
4022 +    A - the matrix
4023 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4024 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4025 
4026    Output Parameter:
4027 .    A_loc - the local sequential matrix generated
4028 
4029     Level: developer
4030 
4031 @*/
4032 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4033 {
4034   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4035   PetscErrorCode    ierr;
4036   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4037   IS                isrowa,iscola;
4038   Mat               *aloc;
4039 
4040   PetscFunctionBegin;
4041   if (!logkey_getlocalmatcondensed) {
4042     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
4043   }
4044   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4045   if (!row){
4046     start = A->rmap.rstart; end = A->rmap.rend;
4047     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4048   } else {
4049     isrowa = *row;
4050   }
4051   if (!col){
4052     start = A->cmap.rstart;
4053     cmap  = a->garray;
4054     nzA   = a->A->cmap.n;
4055     nzB   = a->B->cmap.n;
4056     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4057     ncols = 0;
4058     for (i=0; i<nzB; i++) {
4059       if (cmap[i] < start) idx[ncols++] = cmap[i];
4060       else break;
4061     }
4062     imark = i;
4063     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4064     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4065     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4066     ierr = PetscFree(idx);CHKERRQ(ierr);
4067   } else {
4068     iscola = *col;
4069   }
4070   if (scall != MAT_INITIAL_MATRIX){
4071     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4072     aloc[0] = *A_loc;
4073   }
4074   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4075   *A_loc = aloc[0];
4076   ierr = PetscFree(aloc);CHKERRQ(ierr);
4077   if (!row){
4078     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4079   }
4080   if (!col){
4081     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4082   }
4083   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4084   PetscFunctionReturn(0);
4085 }
4086 
4087 static PetscEvent logkey_GetBrowsOfAcols = 0;
4088 #undef __FUNCT__
4089 #define __FUNCT__ "MatGetBrowsOfAcols"
4090 /*@C
4091     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4092 
4093     Collective on Mat
4094 
4095    Input Parameters:
4096 +    A,B - the matrices in mpiaij format
4097 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4098 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4099 
4100    Output Parameter:
4101 +    rowb, colb - index sets of rows and columns of B to extract
4102 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
4103 -    B_seq - the sequential matrix generated
4104 
4105     Level: developer
4106 
4107 @*/
4108 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4109 {
4110   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4111   PetscErrorCode    ierr;
4112   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4113   IS                isrowb,iscolb;
4114   Mat               *bseq;
4115 
4116   PetscFunctionBegin;
4117   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4118     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4119   }
4120   if (!logkey_GetBrowsOfAcols) {
4121     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
4122   }
4123   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4124 
4125   if (scall == MAT_INITIAL_MATRIX){
4126     start = A->cmap.rstart;
4127     cmap  = a->garray;
4128     nzA   = a->A->cmap.n;
4129     nzB   = a->B->cmap.n;
4130     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4131     ncols = 0;
4132     for (i=0; i<nzB; i++) {  /* row < local row index */
4133       if (cmap[i] < start) idx[ncols++] = cmap[i];
4134       else break;
4135     }
4136     imark = i;
4137     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4138     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4139     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4140     ierr = PetscFree(idx);CHKERRQ(ierr);
4141     *brstart = imark;
4142     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
4143   } else {
4144     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4145     isrowb = *rowb; iscolb = *colb;
4146     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4147     bseq[0] = *B_seq;
4148   }
4149   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4150   *B_seq = bseq[0];
4151   ierr = PetscFree(bseq);CHKERRQ(ierr);
4152   if (!rowb){
4153     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4154   } else {
4155     *rowb = isrowb;
4156   }
4157   if (!colb){
4158     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4159   } else {
4160     *colb = iscolb;
4161   }
4162   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4163   PetscFunctionReturn(0);
4164 }
4165 
4166 static PetscEvent logkey_GetBrowsOfAocols = 0;
4167 #undef __FUNCT__
4168 #define __FUNCT__ "MatGetBrowsOfAoCols"
4169 /*@C
4170     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4171     of the OFF-DIAGONAL portion of local A
4172 
4173     Collective on Mat
4174 
4175    Input Parameters:
4176 +    A,B - the matrices in mpiaij format
4177 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4178 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4179 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4180 
4181    Output Parameter:
4182 +    B_oth - the sequential matrix generated
4183 
4184     Level: developer
4185 
4186 @*/
4187 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
4188 {
4189   VecScatter_MPI_General *gen_to,*gen_from;
4190   PetscErrorCode         ierr;
4191   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4192   Mat_SeqAIJ             *b_oth;
4193   VecScatter             ctx=a->Mvctx;
4194   MPI_Comm               comm=ctx->comm;
4195   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
4196   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
4197   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
4198   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4199   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4200   MPI_Status             *sstatus,rstatus;
4201   PetscInt               *cols,sbs,rbs;
4202   PetscScalar            *vals;
4203 
4204   PetscFunctionBegin;
4205   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4206     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4207   }
4208   if (!logkey_GetBrowsOfAocols) {
4209     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
4210   }
4211   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4212   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4213 
4214   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4215   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4216   rvalues  = gen_from->values; /* holds the length of receiving row */
4217   svalues  = gen_to->values;   /* holds the length of sending row */
4218   nrecvs   = gen_from->n;
4219   nsends   = gen_to->n;
4220 
4221   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4222   srow     = gen_to->indices;   /* local row index to be sent */
4223   sstarts  = gen_to->starts;
4224   sprocs   = gen_to->procs;
4225   sstatus  = gen_to->sstatus;
4226   sbs      = gen_to->bs;
4227   rstarts  = gen_from->starts;
4228   rprocs   = gen_from->procs;
4229   rbs      = gen_from->bs;
4230 
4231   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4232   if (scall == MAT_INITIAL_MATRIX){
4233     /* i-array */
4234     /*---------*/
4235     /*  post receives */
4236     for (i=0; i<nrecvs; i++){
4237       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4238       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4239       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4240     }
4241 
4242     /* pack the outgoing message */
4243     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4244     rstartsj = sstartsj + nsends +1;
4245     sstartsj[0] = 0;  rstartsj[0] = 0;
4246     len = 0; /* total length of j or a array to be sent */
4247     k = 0;
4248     for (i=0; i<nsends; i++){
4249       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4250       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4251       for (j=0; j<nrows; j++) {
4252         row = srow[k] + B->rmap.range[rank]; /* global row idx */
4253         for (l=0; l<sbs; l++){
4254           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4255           rowlen[j*sbs+l] = ncols;
4256           len += ncols;
4257           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4258         }
4259         k++;
4260       }
4261       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4262       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4263     }
4264     /* recvs and sends of i-array are completed */
4265     i = nrecvs;
4266     while (i--) {
4267       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
4268     }
4269     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4270 
4271     /* allocate buffers for sending j and a arrays */
4272     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4273     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4274 
4275     /* create i-array of B_oth */
4276     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4277     b_othi[0] = 0;
4278     len = 0; /* total length of j or a array to be received */
4279     k = 0;
4280     for (i=0; i<nrecvs; i++){
4281       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4282       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4283       for (j=0; j<nrows; j++) {
4284         b_othi[k+1] = b_othi[k] + rowlen[j];
4285         len += rowlen[j]; k++;
4286       }
4287       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4288     }
4289 
4290     /* allocate space for j and a arrrays of B_oth */
4291     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4292     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
4293 
4294     /* j-array */
4295     /*---------*/
4296     /*  post receives of j-array */
4297     for (i=0; i<nrecvs; i++){
4298       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4299       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4300     }
4301 
4302     /* pack the outgoing message j-array */
4303     k = 0;
4304     for (i=0; i<nsends; i++){
4305       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4306       bufJ = bufj+sstartsj[i];
4307       for (j=0; j<nrows; j++) {
4308         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4309         for (ll=0; ll<sbs; ll++){
4310           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4311           for (l=0; l<ncols; l++){
4312             *bufJ++ = cols[l];
4313           }
4314           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4315         }
4316       }
4317       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4318     }
4319 
4320     /* recvs and sends of j-array are completed */
4321     i = nrecvs;
4322     while (i--) {
4323       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
4324     }
4325     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4326   } else if (scall == MAT_REUSE_MATRIX){
4327     sstartsj = *startsj;
4328     rstartsj = sstartsj + nsends +1;
4329     bufa     = *bufa_ptr;
4330     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4331     b_otha   = b_oth->a;
4332   } else {
4333     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4334   }
4335 
4336   /* a-array */
4337   /*---------*/
4338   /*  post receives of a-array */
4339   for (i=0; i<nrecvs; i++){
4340     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4341     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4342   }
4343 
4344   /* pack the outgoing message a-array */
4345   k = 0;
4346   for (i=0; i<nsends; i++){
4347     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4348     bufA = bufa+sstartsj[i];
4349     for (j=0; j<nrows; j++) {
4350       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4351       for (ll=0; ll<sbs; ll++){
4352         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4353         for (l=0; l<ncols; l++){
4354           *bufA++ = vals[l];
4355         }
4356         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4357       }
4358     }
4359     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4360   }
4361   /* recvs and sends of a-array are completed */
4362   i = nrecvs;
4363   while (i--) {
4364     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
4365   }
4366   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4367   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4368 
4369   if (scall == MAT_INITIAL_MATRIX){
4370     /* put together the new matrix */
4371     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4372 
4373     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4374     /* Since these are PETSc arrays, change flags to free them as necessary. */
4375     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4376     b_oth->free_a  = PETSC_TRUE;
4377     b_oth->free_ij = PETSC_TRUE;
4378     b_oth->nonew   = 0;
4379 
4380     ierr = PetscFree(bufj);CHKERRQ(ierr);
4381     if (!startsj || !bufa_ptr){
4382       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4383       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4384     } else {
4385       *startsj  = sstartsj;
4386       *bufa_ptr = bufa;
4387     }
4388   }
4389   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4390   PetscFunctionReturn(0);
4391 }
4392 
4393 #undef __FUNCT__
4394 #define __FUNCT__ "MatGetCommunicationStructs"
4395 /*@C
4396   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4397 
4398   Not Collective
4399 
4400   Input Parameters:
4401 . A - The matrix in mpiaij format
4402 
4403   Output Parameter:
4404 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4405 . colmap - A map from global column index to local index into lvec
4406 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4407 
4408   Level: developer
4409 
4410 @*/
4411 #if defined (PETSC_USE_CTABLE)
4412 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4413 #else
4414 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4415 #endif
4416 {
4417   Mat_MPIAIJ *a;
4418 
4419   PetscFunctionBegin;
4420   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4421   PetscValidPointer(lvec, 2)
4422   PetscValidPointer(colmap, 3)
4423   PetscValidPointer(multScatter, 4)
4424   a = (Mat_MPIAIJ *) A->data;
4425   if (lvec) *lvec = a->lvec;
4426   if (colmap) *colmap = a->colmap;
4427   if (multScatter) *multScatter = a->Mvctx;
4428   PetscFunctionReturn(0);
4429 }
4430 
4431 EXTERN_C_BEGIN
4432 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*);
4433 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*);
4434 EXTERN_C_END
4435 
4436 /*MC
4437    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4438 
4439    Options Database Keys:
4440 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4441 
4442   Level: beginner
4443 
4444 .seealso: MatCreateMPIAIJ
4445 M*/
4446 
4447 EXTERN_C_BEGIN
4448 #undef __FUNCT__
4449 #define __FUNCT__ "MatCreate_MPIAIJ"
4450 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4451 {
4452   Mat_MPIAIJ     *b;
4453   PetscErrorCode ierr;
4454   PetscMPIInt    size;
4455 
4456   PetscFunctionBegin;
4457   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
4458 
4459   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
4460   B->data         = (void*)b;
4461   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4462   B->factor       = 0;
4463   B->rmap.bs      = 1;
4464   B->assembled    = PETSC_FALSE;
4465   B->mapping      = 0;
4466 
4467   B->insertmode      = NOT_SET_VALUES;
4468   b->size            = size;
4469   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
4470 
4471   /* build cache for off array entries formed */
4472   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
4473   b->donotstash  = PETSC_FALSE;
4474   b->colmap      = 0;
4475   b->garray      = 0;
4476   b->roworiented = PETSC_TRUE;
4477 
4478   /* stuff used for matrix vector multiply */
4479   b->lvec      = PETSC_NULL;
4480   b->Mvctx     = PETSC_NULL;
4481 
4482   /* stuff for MatGetRow() */
4483   b->rowindices   = 0;
4484   b->rowvalues    = 0;
4485   b->getrowactive = PETSC_FALSE;
4486 
4487 
4488   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4489                                      "MatStoreValues_MPIAIJ",
4490                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4491   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4492                                      "MatRetrieveValues_MPIAIJ",
4493                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4494   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4495 				     "MatGetDiagonalBlock_MPIAIJ",
4496                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4497   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4498 				     "MatIsTranspose_MPIAIJ",
4499 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4500   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4501 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4502 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4503   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4504 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4505 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4506   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4507 				     "MatDiagonalScaleLocal_MPIAIJ",
4508 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4509   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4510                                      "MatConvert_MPIAIJ_MPICSRPERM",
4511                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
4512   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4513                                      "MatConvert_MPIAIJ_MPICRL",
4514                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
4515   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4516   PetscFunctionReturn(0);
4517 }
4518 EXTERN_C_END
4519 
4520 #undef __FUNCT__
4521 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
4522 /*@C
4523      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4524          and "off-diagonal" part of the matrix in CSR format.
4525 
4526    Collective on MPI_Comm
4527 
4528    Input Parameters:
4529 +  comm - MPI communicator
4530 .  m - number of local rows (Cannot be PETSC_DECIDE)
4531 .  n - This value should be the same as the local size used in creating the
4532        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4533        calculated if N is given) For square matrices n is almost always m.
4534 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4535 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4536 .   i - row indices for "diagonal" portion of matrix
4537 .   j - column indices
4538 .   a - matrix values
4539 .   oi - row indices for "off-diagonal" portion of matrix
4540 .   oj - column indices
4541 -   oa - matrix values
4542 
4543    Output Parameter:
4544 .   mat - the matrix
4545 
4546    Level: advanced
4547 
4548    Notes:
4549        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4550 
4551        The i and j indices are 0 based
4552 
4553        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4554 
4555 
4556 .keywords: matrix, aij, compressed row, sparse, parallel
4557 
4558 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4559           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4560 @*/
4561 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4562 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4563 {
4564   PetscErrorCode ierr;
4565   Mat_MPIAIJ     *maij;
4566 
4567  PetscFunctionBegin;
4568   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4569   if (i[0]) {
4570     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4571   }
4572   if (oi[0]) {
4573     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4574   }
4575   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4576   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4577   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4578   maij = (Mat_MPIAIJ*) (*mat)->data;
4579   maij->donotstash     = PETSC_TRUE;
4580   (*mat)->preallocated = PETSC_TRUE;
4581 
4582   (*mat)->rmap.bs = (*mat)->cmap.bs = 1;
4583   ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr);
4584   ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr);
4585 
4586   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
4587   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
4588 
4589   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4590   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4591   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4592   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4593 
4594   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4595   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4596   PetscFunctionReturn(0);
4597 }
4598 
4599 /*
4600     Special version for direct calls from Fortran
4601 */
4602 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4603 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4604 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4605 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4606 #endif
4607 
4608 /* Change these macros so can be used in void function */
4609 #undef CHKERRQ
4610 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
4611 #undef SETERRQ2
4612 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
4613 #undef SETERRQ
4614 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
4615 
4616 EXTERN_C_BEGIN
4617 #undef __FUNCT__
4618 #define __FUNCT__ "matsetvaluesmpiaij_"
4619 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4620 {
4621   Mat            mat = *mmat;
4622   PetscInt       m = *mm, n = *mn;
4623   InsertMode     addv = *maddv;
4624   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
4625   PetscScalar    value;
4626   PetscErrorCode ierr;
4627 
4628   MatPreallocated(mat);
4629   if (mat->insertmode == NOT_SET_VALUES) {
4630     mat->insertmode = addv;
4631   }
4632 #if defined(PETSC_USE_DEBUG)
4633   else if (mat->insertmode != addv) {
4634     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4635   }
4636 #endif
4637   {
4638   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4639   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4640   PetscTruth     roworiented = aij->roworiented;
4641 
4642   /* Some Variables required in the macro */
4643   Mat            A = aij->A;
4644   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
4645   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4646   PetscScalar    *aa = a->a;
4647   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4648   Mat            B = aij->B;
4649   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4650   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4651   PetscScalar    *ba = b->a;
4652 
4653   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4654   PetscInt       nonew = a->nonew;
4655   PetscScalar    *ap1,*ap2;
4656 
4657   PetscFunctionBegin;
4658   for (i=0; i<m; i++) {
4659     if (im[i] < 0) continue;
4660 #if defined(PETSC_USE_DEBUG)
4661     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4662 #endif
4663     if (im[i] >= rstart && im[i] < rend) {
4664       row      = im[i] - rstart;
4665       lastcol1 = -1;
4666       rp1      = aj + ai[row];
4667       ap1      = aa + ai[row];
4668       rmax1    = aimax[row];
4669       nrow1    = ailen[row];
4670       low1     = 0;
4671       high1    = nrow1;
4672       lastcol2 = -1;
4673       rp2      = bj + bi[row];
4674       ap2      = ba + bi[row];
4675       rmax2    = bimax[row];
4676       nrow2    = bilen[row];
4677       low2     = 0;
4678       high2    = nrow2;
4679 
4680       for (j=0; j<n; j++) {
4681         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4682         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4683         if (in[j] >= cstart && in[j] < cend){
4684           col = in[j] - cstart;
4685           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4686         } else if (in[j] < 0) continue;
4687 #if defined(PETSC_USE_DEBUG)
4688         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4689 #endif
4690         else {
4691           if (mat->was_assembled) {
4692             if (!aij->colmap) {
4693               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4694             }
4695 #if defined (PETSC_USE_CTABLE)
4696             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4697 	    col--;
4698 #else
4699             col = aij->colmap[in[j]] - 1;
4700 #endif
4701             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4702               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4703               col =  in[j];
4704               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4705               B = aij->B;
4706               b = (Mat_SeqAIJ*)B->data;
4707               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4708               rp2      = bj + bi[row];
4709               ap2      = ba + bi[row];
4710               rmax2    = bimax[row];
4711               nrow2    = bilen[row];
4712               low2     = 0;
4713               high2    = nrow2;
4714               bm       = aij->B->rmap.n;
4715               ba = b->a;
4716             }
4717           } else col = in[j];
4718           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4719         }
4720       }
4721     } else {
4722       if (!aij->donotstash) {
4723         if (roworiented) {
4724           if (ignorezeroentries && v[i*n] == 0.0) continue;
4725           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4726         } else {
4727           if (ignorezeroentries && v[i] == 0.0) continue;
4728           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4729         }
4730       }
4731     }
4732   }}
4733   PetscFunctionReturnVoid();
4734 }
4735 EXTERN_C_END
4736 
4737