xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 58cd72c3d0f0c5a63e84afc03ac54bb1bb84cd8d)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatGetValues_MPIAIJ"
241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
242 {
243   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
244   PetscErrorCode ierr;
245   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
246   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
247 
248   PetscFunctionBegin;
249   for (i=0; i<m; i++) {
250     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
251     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
252     if (idxm[i] >= rstart && idxm[i] < rend) {
253       row = idxm[i] - rstart;
254       for (j=0; j<n; j++) {
255         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
256         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
257         if (idxn[j] >= cstart && idxn[j] < cend){
258           col = idxn[j] - cstart;
259           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
260         } else {
261           if (!aij->colmap) {
262             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
263           }
264 #if defined (PETSC_USE_CTABLE)
265           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
266           col --;
267 #else
268           col = aij->colmap[idxn[j]] - 1;
269 #endif
270           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
271           else {
272             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
273           }
274         }
275       }
276     } else {
277       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
278     }
279   }
280   PetscFunctionReturn(0);
281 }
282 
283 #undef __FUNCT__
284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
286 {
287   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
288   PetscErrorCode ierr;
289   PetscInt       nstash,reallocs;
290   InsertMode     addv;
291 
292   PetscFunctionBegin;
293   if (aij->donotstash) {
294     PetscFunctionReturn(0);
295   }
296 
297   /* make sure all processors are either in INSERTMODE or ADDMODE */
298   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
299   if (addv == (ADD_VALUES|INSERT_VALUES)) {
300     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
301   }
302   mat->insertmode = addv; /* in case this processor had no cache */
303 
304   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
305   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
306   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
307   PetscFunctionReturn(0);
308 }
309 
310 #undef __FUNCT__
311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
313 {
314   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
315   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
316   PetscErrorCode ierr;
317   PetscMPIInt    n;
318   PetscInt       i,j,rstart,ncols,flg;
319   PetscInt       *row,*col,other_disassembled;
320   PetscScalar    *val;
321   InsertMode     addv = mat->insertmode;
322 
323   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
324   PetscFunctionBegin;
325   if (!aij->donotstash) {
326     while (1) {
327       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
328       if (!flg) break;
329 
330       for (i=0; i<n;) {
331         /* Now identify the consecutive vals belonging to the same row */
332         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
333         if (j < n) ncols = j-i;
334         else       ncols = n-i;
335         /* Now assemble all these values with a single function call */
336         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
337         i = j;
338       }
339     }
340     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
341   }
342   a->compressedrow.use     = PETSC_FALSE;
343   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
344   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
345 
346   /* determine if any processor has disassembled, if so we must
347      also disassemble ourselfs, in order that we may reassemble. */
348   /*
349      if nonzero structure of submatrix B cannot change then we know that
350      no processor disassembled thus we can skip this stuff
351   */
352   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
353     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
354     if (mat->was_assembled && !other_disassembled) {
355       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
356     }
357   }
358   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
359     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
360   }
361   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
362   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
363   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
364   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
365 
366   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
367   aij->rowvalues = 0;
368 
369   /* used by MatAXPY() */
370   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
371   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
372 
373   PetscFunctionReturn(0);
374 }
375 
376 #undef __FUNCT__
377 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
378 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
379 {
380   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
381   PetscErrorCode ierr;
382 
383   PetscFunctionBegin;
384   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
385   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
386   PetscFunctionReturn(0);
387 }
388 
389 #undef __FUNCT__
390 #define __FUNCT__ "MatZeroRows_MPIAIJ"
391 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
392 {
393   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
394   PetscErrorCode ierr;
395   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
396   PetscInt       i,*owners = A->rmap.range;
397   PetscInt       *nprocs,j,idx,nsends,row;
398   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
399   PetscInt       *rvalues,count,base,slen,*source;
400   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
401   MPI_Comm       comm = A->comm;
402   MPI_Request    *send_waits,*recv_waits;
403   MPI_Status     recv_status,*send_status;
404 #if defined(PETSC_DEBUG)
405   PetscTruth     found = PETSC_FALSE;
406 #endif
407 
408   PetscFunctionBegin;
409   /*  first count number of contributors to each processor */
410   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
411   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
412   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
413   j = 0;
414   for (i=0; i<N; i++) {
415     if (lastidx > (idx = rows[i])) j = 0;
416     lastidx = idx;
417     for (; j<size; j++) {
418       if (idx >= owners[j] && idx < owners[j+1]) {
419         nprocs[2*j]++;
420         nprocs[2*j+1] = 1;
421         owner[i] = j;
422 #if defined(PETSC_DEBUG)
423         found = PETSC_TRUE;
424 #endif
425         break;
426       }
427     }
428 #if defined(PETSC_DEBUG)
429     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
430     found = PETSC_FALSE;
431 #endif
432   }
433   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
434 
435   /* inform other processors of number of messages and max length*/
436   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
437 
438   /* post receives:   */
439   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
440   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
441   for (i=0; i<nrecvs; i++) {
442     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
443   }
444 
445   /* do sends:
446       1) starts[i] gives the starting index in svalues for stuff going to
447          the ith processor
448   */
449   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
450   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
451   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
452   starts[0] = 0;
453   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
454   for (i=0; i<N; i++) {
455     svalues[starts[owner[i]]++] = rows[i];
456   }
457 
458   starts[0] = 0;
459   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
460   count = 0;
461   for (i=0; i<size; i++) {
462     if (nprocs[2*i+1]) {
463       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
464     }
465   }
466   ierr = PetscFree(starts);CHKERRQ(ierr);
467 
468   base = owners[rank];
469 
470   /*  wait on receives */
471   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
472   source = lens + nrecvs;
473   count  = nrecvs; slen = 0;
474   while (count) {
475     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
476     /* unpack receives into our local space */
477     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
478     source[imdex]  = recv_status.MPI_SOURCE;
479     lens[imdex]    = n;
480     slen          += n;
481     count--;
482   }
483   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
484 
485   /* move the data into the send scatter */
486   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
487   count = 0;
488   for (i=0; i<nrecvs; i++) {
489     values = rvalues + i*nmax;
490     for (j=0; j<lens[i]; j++) {
491       lrows[count++] = values[j] - base;
492     }
493   }
494   ierr = PetscFree(rvalues);CHKERRQ(ierr);
495   ierr = PetscFree(lens);CHKERRQ(ierr);
496   ierr = PetscFree(owner);CHKERRQ(ierr);
497   ierr = PetscFree(nprocs);CHKERRQ(ierr);
498 
499   /* actually zap the local rows */
500   /*
501         Zero the required rows. If the "diagonal block" of the matrix
502      is square and the user wishes to set the diagonal we use separate
503      code so that MatSetValues() is not called for each diagonal allocating
504      new memory, thus calling lots of mallocs and slowing things down.
505 
506        Contributed by: Matthew Knepley
507   */
508   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
509   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
510   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
511     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
512   } else if (diag != 0.0) {
513     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
514     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
515       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
516 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
517     }
518     for (i = 0; i < slen; i++) {
519       row  = lrows[i] + rstart;
520       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
521     }
522     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
523     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
524   } else {
525     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
526   }
527   ierr = PetscFree(lrows);CHKERRQ(ierr);
528 
529   /* wait on sends */
530   if (nsends) {
531     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
532     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
533     ierr = PetscFree(send_status);CHKERRQ(ierr);
534   }
535   ierr = PetscFree(send_waits);CHKERRQ(ierr);
536   ierr = PetscFree(svalues);CHKERRQ(ierr);
537 
538   PetscFunctionReturn(0);
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatMult_MPIAIJ"
543 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
544 {
545   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
546   PetscErrorCode ierr;
547   PetscInt       nt;
548 
549   PetscFunctionBegin;
550   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
551   if (nt != A->cmap.n) {
552     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
553   }
554   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
555   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
556   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
557   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "MatMultAdd_MPIAIJ"
563 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
564 {
565   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
566   PetscErrorCode ierr;
567 
568   PetscFunctionBegin;
569   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
570   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
571   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
572   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
573   PetscFunctionReturn(0);
574 }
575 
576 #undef __FUNCT__
577 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
578 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
579 {
580   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
581   PetscErrorCode ierr;
582   PetscTruth     merged;
583 
584   PetscFunctionBegin;
585   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
586   /* do nondiagonal part */
587   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
588   if (!merged) {
589     /* send it on its way */
590     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
591     /* do local part */
592     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
593     /* receive remote parts: note this assumes the values are not actually */
594     /* added in yy until the next line, */
595     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
596   } else {
597     /* do local part */
598     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
599     /* send it on its way */
600     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
601     /* values actually were received in the Begin() but we need to call this nop */
602     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 EXTERN_C_BEGIN
608 #undef __FUNCT__
609 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
610 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
611 {
612   MPI_Comm       comm;
613   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
614   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
615   IS             Me,Notme;
616   PetscErrorCode ierr;
617   PetscInt       M,N,first,last,*notme,i;
618   PetscMPIInt    size;
619 
620   PetscFunctionBegin;
621 
622   /* Easy test: symmetric diagonal block */
623   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
624   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
625   if (!*f) PetscFunctionReturn(0);
626   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
627   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
628   if (size == 1) PetscFunctionReturn(0);
629 
630   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
631   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
632   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
633   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
634   for (i=0; i<first; i++) notme[i] = i;
635   for (i=last; i<M; i++) notme[i-last+first] = i;
636   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
637   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
638   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
639   Aoff = Aoffs[0];
640   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
641   Boff = Boffs[0];
642   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
643   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
644   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
645   ierr = ISDestroy(Me);CHKERRQ(ierr);
646   ierr = ISDestroy(Notme);CHKERRQ(ierr);
647 
648   PetscFunctionReturn(0);
649 }
650 EXTERN_C_END
651 
652 #undef __FUNCT__
653 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
654 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
655 {
656   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
657   PetscErrorCode ierr;
658 
659   PetscFunctionBegin;
660   /* do nondiagonal part */
661   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
662   /* send it on its way */
663   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
664   /* do local part */
665   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
666   /* receive remote parts */
667   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
668   PetscFunctionReturn(0);
669 }
670 
671 /*
672   This only works correctly for square matrices where the subblock A->A is the
673    diagonal block
674 */
675 #undef __FUNCT__
676 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
677 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
678 {
679   PetscErrorCode ierr;
680   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
681 
682   PetscFunctionBegin;
683   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
684   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
685     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
686   }
687   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatScale_MPIAIJ"
693 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
694 {
695   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
696   PetscErrorCode ierr;
697 
698   PetscFunctionBegin;
699   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
700   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
701   PetscFunctionReturn(0);
702 }
703 
704 #undef __FUNCT__
705 #define __FUNCT__ "MatDestroy_MPIAIJ"
706 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
707 {
708   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
709   PetscErrorCode ierr;
710 
711   PetscFunctionBegin;
712 #if defined(PETSC_USE_LOG)
713   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
714 #endif
715   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
716   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
717   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
718 #if defined (PETSC_USE_CTABLE)
719   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
720 #else
721   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
722 #endif
723   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
724   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
725   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
726   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
727   ierr = PetscFree(aij);CHKERRQ(ierr);
728 
729   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
730   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
736   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatView_MPIAIJ_Binary"
742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
743 {
744   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
745   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
746   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
747   PetscErrorCode    ierr;
748   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
749   int               fd;
750   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
751   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
752   PetscScalar       *column_values;
753 
754   PetscFunctionBegin;
755   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
756   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
757   nz   = A->nz + B->nz;
758   if (!rank) {
759     header[0] = MAT_FILE_COOKIE;
760     header[1] = mat->rmap.N;
761     header[2] = mat->cmap.N;
762     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
763     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
764     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
765     /* get largest number of rows any processor has */
766     rlen = mat->rmap.n;
767     range = mat->rmap.range;
768     for (i=1; i<size; i++) {
769       rlen = PetscMax(rlen,range[i+1] - range[i]);
770     }
771   } else {
772     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
773     rlen = mat->rmap.n;
774   }
775 
776   /* load up the local row counts */
777   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
778   for (i=0; i<mat->rmap.n; i++) {
779     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
780   }
781 
782   /* store the row lengths to the file */
783   if (!rank) {
784     MPI_Status status;
785     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
786     for (i=1; i<size; i++) {
787       rlen = range[i+1] - range[i];
788       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
789       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
790     }
791   } else {
792     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
793   }
794   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
795 
796   /* load up the local column indices */
797   nzmax = nz; /* )th processor needs space a largest processor needs */
798   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
799   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
800   cnt  = 0;
801   for (i=0; i<mat->rmap.n; i++) {
802     for (j=B->i[i]; j<B->i[i+1]; j++) {
803       if ( (col = garray[B->j[j]]) > cstart) break;
804       column_indices[cnt++] = col;
805     }
806     for (k=A->i[i]; k<A->i[i+1]; k++) {
807       column_indices[cnt++] = A->j[k] + cstart;
808     }
809     for (; j<B->i[i+1]; j++) {
810       column_indices[cnt++] = garray[B->j[j]];
811     }
812   }
813   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
814 
815   /* store the column indices to the file */
816   if (!rank) {
817     MPI_Status status;
818     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
819     for (i=1; i<size; i++) {
820       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
821       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
822       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
823       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
824     }
825   } else {
826     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
827     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
828   }
829   ierr = PetscFree(column_indices);CHKERRQ(ierr);
830 
831   /* load up the local column values */
832   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
833   cnt  = 0;
834   for (i=0; i<mat->rmap.n; i++) {
835     for (j=B->i[i]; j<B->i[i+1]; j++) {
836       if ( garray[B->j[j]] > cstart) break;
837       column_values[cnt++] = B->a[j];
838     }
839     for (k=A->i[i]; k<A->i[i+1]; k++) {
840       column_values[cnt++] = A->a[k];
841     }
842     for (; j<B->i[i+1]; j++) {
843       column_values[cnt++] = B->a[j];
844     }
845   }
846   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
847 
848   /* store the column values to the file */
849   if (!rank) {
850     MPI_Status status;
851     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
852     for (i=1; i<size; i++) {
853       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
854       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
855       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
856       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
857     }
858   } else {
859     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
860     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
861   }
862   ierr = PetscFree(column_values);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
869 {
870   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
871   PetscErrorCode    ierr;
872   PetscMPIInt       rank = aij->rank,size = aij->size;
873   PetscTruth        isdraw,iascii,isbinary;
874   PetscViewer       sviewer;
875   PetscViewerFormat format;
876 
877   PetscFunctionBegin;
878   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
880   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
881   if (iascii) {
882     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
883     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
884       MatInfo    info;
885       PetscTruth inodes;
886 
887       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
888       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
889       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
890       if (!inodes) {
891         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
892 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
893       } else {
894         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
895 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
896       }
897       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
898       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
899       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
900       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
901       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
902       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
903       PetscFunctionReturn(0);
904     } else if (format == PETSC_VIEWER_ASCII_INFO) {
905       PetscInt   inodecount,inodelimit,*inodes;
906       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
907       if (inodes) {
908         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
909       } else {
910         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
911       }
912       PetscFunctionReturn(0);
913     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
914       PetscFunctionReturn(0);
915     }
916   } else if (isbinary) {
917     if (size == 1) {
918       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
919       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
920     } else {
921       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
922     }
923     PetscFunctionReturn(0);
924   } else if (isdraw) {
925     PetscDraw  draw;
926     PetscTruth isnull;
927     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
928     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
929   }
930 
931   if (size == 1) {
932     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
933     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
934   } else {
935     /* assemble the entire matrix onto first processor. */
936     Mat         A;
937     Mat_SeqAIJ  *Aloc;
938     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
939     PetscScalar *a;
940 
941     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
942     if (!rank) {
943       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
944     } else {
945       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
946     }
947     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
948     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
949     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
950     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
951 
952     /* copy over the A part */
953     Aloc = (Mat_SeqAIJ*)aij->A->data;
954     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
955     row = mat->rmap.rstart;
956     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
957     for (i=0; i<m; i++) {
958       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
959       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
960     }
961     aj = Aloc->j;
962     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
963 
964     /* copy over the B part */
965     Aloc = (Mat_SeqAIJ*)aij->B->data;
966     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
967     row  = mat->rmap.rstart;
968     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
969     ct   = cols;
970     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
971     for (i=0; i<m; i++) {
972       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
973       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
974     }
975     ierr = PetscFree(ct);CHKERRQ(ierr);
976     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978     /*
979        Everyone has to call to draw the matrix since the graphics waits are
980        synchronized across all processors that share the PetscDraw object
981     */
982     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
983     if (!rank) {
984       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
985       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
986     }
987     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
988     ierr = MatDestroy(A);CHKERRQ(ierr);
989   }
990   PetscFunctionReturn(0);
991 }
992 
993 #undef __FUNCT__
994 #define __FUNCT__ "MatView_MPIAIJ"
995 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
996 {
997   PetscErrorCode ierr;
998   PetscTruth     iascii,isdraw,issocket,isbinary;
999 
1000   PetscFunctionBegin;
1001   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1002   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1003   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1004   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1005   if (iascii || isdraw || isbinary || issocket) {
1006     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1007   } else {
1008     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1009   }
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 
1014 
1015 #undef __FUNCT__
1016 #define __FUNCT__ "MatRelax_MPIAIJ"
1017 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1018 {
1019   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1020   PetscErrorCode ierr;
1021   Vec            bb1;
1022 
1023   PetscFunctionBegin;
1024   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1025 
1026   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1027 
1028   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1029     if (flag & SOR_ZERO_INITIAL_GUESS) {
1030       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1031       its--;
1032     }
1033 
1034     while (its--) {
1035       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1036       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1037 
1038       /* update rhs: bb1 = bb - B*x */
1039       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1040       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1041 
1042       /* local sweep */
1043       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1044       CHKERRQ(ierr);
1045     }
1046   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1047     if (flag & SOR_ZERO_INITIAL_GUESS) {
1048       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1049       its--;
1050     }
1051     while (its--) {
1052       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1053       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1054 
1055       /* update rhs: bb1 = bb - B*x */
1056       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1057       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1058 
1059       /* local sweep */
1060       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1061       CHKERRQ(ierr);
1062     }
1063   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1064     if (flag & SOR_ZERO_INITIAL_GUESS) {
1065       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1066       its--;
1067     }
1068     while (its--) {
1069       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1070       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1071 
1072       /* update rhs: bb1 = bb - B*x */
1073       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1074       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1075 
1076       /* local sweep */
1077       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1078       CHKERRQ(ierr);
1079     }
1080   } else {
1081     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1082   }
1083 
1084   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatPermute_MPIAIJ"
1090 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1091 {
1092   MPI_Comm       comm,pcomm;
1093   PetscInt       first,local_size,nrows,*rows;
1094   int            ntids;
1095   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1096   PetscErrorCode ierr;
1097 
1098   PetscFunctionBegin;
1099   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1100   /* make a collective version of 'rowp' */
1101   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1102   if (pcomm==comm) {
1103     crowp = rowp;
1104   } else {
1105     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1106     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1107     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1108     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1109   }
1110   /* collect the global row permutation and invert it */
1111   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1112   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1113   if (pcomm!=comm) {
1114     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1115   }
1116   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1117   /* get the local target indices */
1118   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1119   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1120   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1121   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1122   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1123   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1124   /* the column permutation is so much easier;
1125      make a local version of 'colp' and invert it */
1126   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1127   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1128   if (ntids==1) {
1129     lcolp = colp;
1130   } else {
1131     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1132     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1133     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1134   }
1135   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1136   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1137   if (ntids>1) {
1138     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1139     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1140   }
1141   /* now we just get the submatrix */
1142   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1143   /* clean up */
1144   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1145   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 #undef __FUNCT__
1150 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1151 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1152 {
1153   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1154   Mat            A = mat->A,B = mat->B;
1155   PetscErrorCode ierr;
1156   PetscReal      isend[5],irecv[5];
1157 
1158   PetscFunctionBegin;
1159   info->block_size     = 1.0;
1160   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1161   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1162   isend[3] = info->memory;  isend[4] = info->mallocs;
1163   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1164   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1165   isend[3] += info->memory;  isend[4] += info->mallocs;
1166   if (flag == MAT_LOCAL) {
1167     info->nz_used      = isend[0];
1168     info->nz_allocated = isend[1];
1169     info->nz_unneeded  = isend[2];
1170     info->memory       = isend[3];
1171     info->mallocs      = isend[4];
1172   } else if (flag == MAT_GLOBAL_MAX) {
1173     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1174     info->nz_used      = irecv[0];
1175     info->nz_allocated = irecv[1];
1176     info->nz_unneeded  = irecv[2];
1177     info->memory       = irecv[3];
1178     info->mallocs      = irecv[4];
1179   } else if (flag == MAT_GLOBAL_SUM) {
1180     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1181     info->nz_used      = irecv[0];
1182     info->nz_allocated = irecv[1];
1183     info->nz_unneeded  = irecv[2];
1184     info->memory       = irecv[3];
1185     info->mallocs      = irecv[4];
1186   }
1187   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1188   info->fill_ratio_needed = 0;
1189   info->factor_mallocs    = 0;
1190   info->rows_global       = (double)matin->rmap.N;
1191   info->columns_global    = (double)matin->cmap.N;
1192   info->rows_local        = (double)matin->rmap.n;
1193   info->columns_local     = (double)matin->cmap.N;
1194 
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 #undef __FUNCT__
1199 #define __FUNCT__ "MatSetOption_MPIAIJ"
1200 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1201 {
1202   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1203   PetscErrorCode ierr;
1204 
1205   PetscFunctionBegin;
1206   switch (op) {
1207   case MAT_NO_NEW_NONZERO_LOCATIONS:
1208   case MAT_YES_NEW_NONZERO_LOCATIONS:
1209   case MAT_COLUMNS_UNSORTED:
1210   case MAT_COLUMNS_SORTED:
1211   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1212   case MAT_KEEP_ZEROED_ROWS:
1213   case MAT_NEW_NONZERO_LOCATION_ERR:
1214   case MAT_USE_INODES:
1215   case MAT_DO_NOT_USE_INODES:
1216   case MAT_IGNORE_ZERO_ENTRIES:
1217     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1218     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1219     break;
1220   case MAT_ROW_ORIENTED:
1221     a->roworiented = PETSC_TRUE;
1222     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1223     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1224     break;
1225   case MAT_ROWS_SORTED:
1226   case MAT_ROWS_UNSORTED:
1227   case MAT_YES_NEW_DIAGONALS:
1228     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1229     break;
1230   case MAT_COLUMN_ORIENTED:
1231     a->roworiented = PETSC_FALSE;
1232     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1233     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1234     break;
1235   case MAT_IGNORE_OFF_PROC_ENTRIES:
1236     a->donotstash = PETSC_TRUE;
1237     break;
1238   case MAT_NO_NEW_DIAGONALS:
1239     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1240   case MAT_SYMMETRIC:
1241     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1242     break;
1243   case MAT_STRUCTURALLY_SYMMETRIC:
1244   case MAT_HERMITIAN:
1245   case MAT_SYMMETRY_ETERNAL:
1246     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1247     break;
1248   case MAT_NOT_SYMMETRIC:
1249   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1250   case MAT_NOT_HERMITIAN:
1251   case MAT_NOT_SYMMETRY_ETERNAL:
1252     break;
1253   default:
1254     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1255   }
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatGetRow_MPIAIJ"
1261 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1262 {
1263   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1264   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1265   PetscErrorCode ierr;
1266   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1267   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1268   PetscInt       *cmap,*idx_p;
1269 
1270   PetscFunctionBegin;
1271   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1272   mat->getrowactive = PETSC_TRUE;
1273 
1274   if (!mat->rowvalues && (idx || v)) {
1275     /*
1276         allocate enough space to hold information from the longest row.
1277     */
1278     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1279     PetscInt     max = 1,tmp;
1280     for (i=0; i<matin->rmap.n; i++) {
1281       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1282       if (max < tmp) { max = tmp; }
1283     }
1284     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1285     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1286   }
1287 
1288   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1289   lrow = row - rstart;
1290 
1291   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1292   if (!v)   {pvA = 0; pvB = 0;}
1293   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1294   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1295   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1296   nztot = nzA + nzB;
1297 
1298   cmap  = mat->garray;
1299   if (v  || idx) {
1300     if (nztot) {
1301       /* Sort by increasing column numbers, assuming A and B already sorted */
1302       PetscInt imark = -1;
1303       if (v) {
1304         *v = v_p = mat->rowvalues;
1305         for (i=0; i<nzB; i++) {
1306           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1307           else break;
1308         }
1309         imark = i;
1310         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1311         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1312       }
1313       if (idx) {
1314         *idx = idx_p = mat->rowindices;
1315         if (imark > -1) {
1316           for (i=0; i<imark; i++) {
1317             idx_p[i] = cmap[cworkB[i]];
1318           }
1319         } else {
1320           for (i=0; i<nzB; i++) {
1321             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1322             else break;
1323           }
1324           imark = i;
1325         }
1326         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1327         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1328       }
1329     } else {
1330       if (idx) *idx = 0;
1331       if (v)   *v   = 0;
1332     }
1333   }
1334   *nz = nztot;
1335   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1336   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1342 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1343 {
1344   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1345 
1346   PetscFunctionBegin;
1347   if (!aij->getrowactive) {
1348     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1349   }
1350   aij->getrowactive = PETSC_FALSE;
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 #undef __FUNCT__
1355 #define __FUNCT__ "MatNorm_MPIAIJ"
1356 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1357 {
1358   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1359   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1360   PetscErrorCode ierr;
1361   PetscInt       i,j,cstart = mat->cmap.rstart;
1362   PetscReal      sum = 0.0;
1363   PetscScalar    *v;
1364 
1365   PetscFunctionBegin;
1366   if (aij->size == 1) {
1367     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1368   } else {
1369     if (type == NORM_FROBENIUS) {
1370       v = amat->a;
1371       for (i=0; i<amat->nz; i++) {
1372 #if defined(PETSC_USE_COMPLEX)
1373         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1374 #else
1375         sum += (*v)*(*v); v++;
1376 #endif
1377       }
1378       v = bmat->a;
1379       for (i=0; i<bmat->nz; i++) {
1380 #if defined(PETSC_USE_COMPLEX)
1381         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1382 #else
1383         sum += (*v)*(*v); v++;
1384 #endif
1385       }
1386       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1387       *norm = sqrt(*norm);
1388     } else if (type == NORM_1) { /* max column norm */
1389       PetscReal *tmp,*tmp2;
1390       PetscInt    *jj,*garray = aij->garray;
1391       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1392       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1393       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1394       *norm = 0.0;
1395       v = amat->a; jj = amat->j;
1396       for (j=0; j<amat->nz; j++) {
1397         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1398       }
1399       v = bmat->a; jj = bmat->j;
1400       for (j=0; j<bmat->nz; j++) {
1401         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1402       }
1403       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1404       for (j=0; j<mat->cmap.N; j++) {
1405         if (tmp2[j] > *norm) *norm = tmp2[j];
1406       }
1407       ierr = PetscFree(tmp);CHKERRQ(ierr);
1408       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1409     } else if (type == NORM_INFINITY) { /* max row norm */
1410       PetscReal ntemp = 0.0;
1411       for (j=0; j<aij->A->rmap.n; j++) {
1412         v = amat->a + amat->i[j];
1413         sum = 0.0;
1414         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1415           sum += PetscAbsScalar(*v); v++;
1416         }
1417         v = bmat->a + bmat->i[j];
1418         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1419           sum += PetscAbsScalar(*v); v++;
1420         }
1421         if (sum > ntemp) ntemp = sum;
1422       }
1423       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1424     } else {
1425       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1426     }
1427   }
1428   PetscFunctionReturn(0);
1429 }
1430 
1431 #undef __FUNCT__
1432 #define __FUNCT__ "MatTranspose_MPIAIJ"
1433 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1434 {
1435   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1436   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1437   PetscErrorCode ierr;
1438   PetscInt       M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
1439   Mat            B;
1440   PetscScalar    *array;
1441 
1442   PetscFunctionBegin;
1443   if (!matout && M != N) {
1444     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1445   }
1446 
1447   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1448   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1449   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1450   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1451 
1452   /* copy over the A part */
1453   Aloc = (Mat_SeqAIJ*)a->A->data;
1454   m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1455   row = A->rmap.rstart;
1456   for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;}
1457   for (i=0; i<m; i++) {
1458     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1459     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1460   }
1461   aj = Aloc->j;
1462   for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;}
1463 
1464   /* copy over the B part */
1465   Aloc = (Mat_SeqAIJ*)a->B->data;
1466   m = a->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1467   row  = A->rmap.rstart;
1468   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1469   ct   = cols;
1470   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1471   for (i=0; i<m; i++) {
1472     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1473     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1474   }
1475   ierr = PetscFree(ct);CHKERRQ(ierr);
1476   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1477   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1478   if (matout) {
1479     *matout = B;
1480   } else {
1481     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1482   }
1483   PetscFunctionReturn(0);
1484 }
1485 
1486 #undef __FUNCT__
1487 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1488 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1489 {
1490   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1491   Mat            a = aij->A,b = aij->B;
1492   PetscErrorCode ierr;
1493   PetscInt       s1,s2,s3;
1494 
1495   PetscFunctionBegin;
1496   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1497   if (rr) {
1498     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1499     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1500     /* Overlap communication with computation. */
1501     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1502   }
1503   if (ll) {
1504     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1505     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1506     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1507   }
1508   /* scale  the diagonal block */
1509   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1510 
1511   if (rr) {
1512     /* Do a scatter end and then right scale the off-diagonal block */
1513     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1514     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1515   }
1516 
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 #undef __FUNCT__
1521 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1522 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1523 {
1524   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1525   PetscErrorCode ierr;
1526 
1527   PetscFunctionBegin;
1528   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1529   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1530   PetscFunctionReturn(0);
1531 }
1532 #undef __FUNCT__
1533 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1534 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1535 {
1536   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1537   PetscErrorCode ierr;
1538 
1539   PetscFunctionBegin;
1540   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 #undef __FUNCT__
1545 #define __FUNCT__ "MatEqual_MPIAIJ"
1546 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1547 {
1548   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1549   Mat            a,b,c,d;
1550   PetscTruth     flg;
1551   PetscErrorCode ierr;
1552 
1553   PetscFunctionBegin;
1554   a = matA->A; b = matA->B;
1555   c = matB->A; d = matB->B;
1556 
1557   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1558   if (flg) {
1559     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1560   }
1561   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1562   PetscFunctionReturn(0);
1563 }
1564 
1565 #undef __FUNCT__
1566 #define __FUNCT__ "MatCopy_MPIAIJ"
1567 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1568 {
1569   PetscErrorCode ierr;
1570   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1571   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1572 
1573   PetscFunctionBegin;
1574   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1575   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1576     /* because of the column compression in the off-processor part of the matrix a->B,
1577        the number of columns in a->B and b->B may be different, hence we cannot call
1578        the MatCopy() directly on the two parts. If need be, we can provide a more
1579        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1580        then copying the submatrices */
1581     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1582   } else {
1583     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1584     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1585   }
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 #undef __FUNCT__
1590 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1591 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1592 {
1593   PetscErrorCode ierr;
1594 
1595   PetscFunctionBegin;
1596   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 #include "petscblaslapack.h"
1601 #undef __FUNCT__
1602 #define __FUNCT__ "MatAXPY_MPIAIJ"
1603 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1604 {
1605   PetscErrorCode ierr;
1606   PetscInt       i;
1607   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1608   PetscBLASInt   bnz,one=1;
1609   Mat_SeqAIJ     *x,*y;
1610 
1611   PetscFunctionBegin;
1612   if (str == SAME_NONZERO_PATTERN) {
1613     PetscScalar alpha = a;
1614     x = (Mat_SeqAIJ *)xx->A->data;
1615     y = (Mat_SeqAIJ *)yy->A->data;
1616     bnz = (PetscBLASInt)x->nz;
1617     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1618     x = (Mat_SeqAIJ *)xx->B->data;
1619     y = (Mat_SeqAIJ *)yy->B->data;
1620     bnz = (PetscBLASInt)x->nz;
1621     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1622   } else if (str == SUBSET_NONZERO_PATTERN) {
1623     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1624 
1625     x = (Mat_SeqAIJ *)xx->B->data;
1626     y = (Mat_SeqAIJ *)yy->B->data;
1627     if (y->xtoy && y->XtoY != xx->B) {
1628       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1629       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1630     }
1631     if (!y->xtoy) { /* get xtoy */
1632       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1633       y->XtoY = xx->B;
1634     }
1635     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1636   } else {
1637     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1638   }
1639   PetscFunctionReturn(0);
1640 }
1641 
1642 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1643 
1644 #undef __FUNCT__
1645 #define __FUNCT__ "MatConjugate_MPIAIJ"
1646 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1647 {
1648 #if defined(PETSC_USE_COMPLEX)
1649   PetscErrorCode ierr;
1650   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1651 
1652   PetscFunctionBegin;
1653   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1654   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1655 #else
1656   PetscFunctionBegin;
1657 #endif
1658   PetscFunctionReturn(0);
1659 }
1660 
1661 #undef __FUNCT__
1662 #define __FUNCT__ "MatRealPart_MPIAIJ"
1663 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1664 {
1665   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1666   PetscErrorCode ierr;
1667 
1668   PetscFunctionBegin;
1669   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1670   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1671   PetscFunctionReturn(0);
1672 }
1673 
1674 #undef __FUNCT__
1675 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1676 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1677 {
1678   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1679   PetscErrorCode ierr;
1680 
1681   PetscFunctionBegin;
1682   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1683   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1684   PetscFunctionReturn(0);
1685 }
1686 
1687 #ifdef PETSC_HAVE_PBGL
1688 
1689 #include <boost/parallel/mpi/bsp_process_group.hpp>
1690 #include <boost/graph/distributed/ilu_default_graph.hpp>
1691 #include <boost/graph/distributed/ilu_0_block.hpp>
1692 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1693 #include <boost/graph/distributed/petsc/interface.hpp>
1694 #include <boost/multi_array.hpp>
1695 #include <boost/parallel/distributed_property_map.hpp>
1696 
1697 #undef __FUNCT__
1698 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1699 /*
1700   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1701 */
1702 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1703 {
1704   namespace petsc = boost::distributed::petsc;
1705 
1706   namespace graph_dist = boost::graph::distributed;
1707   using boost::graph::distributed::ilu_default::process_group_type;
1708   using boost::graph::ilu_permuted;
1709 
1710   PetscTruth           row_identity, col_identity;
1711   PetscObjectContainer c;
1712   PetscInt             m, n, M, N;
1713   PetscErrorCode       ierr;
1714 
1715   PetscFunctionBegin;
1716   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1717   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1718   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1719   if (!row_identity || !col_identity) {
1720     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1721   }
1722 
1723   process_group_type pg;
1724   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1725   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1726   lgraph_type&   level_graph = *lgraph_p;
1727   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1728 
1729   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1730   ilu_permuted(level_graph);
1731 
1732   /* put together the new matrix */
1733   ierr = MatCreate(A->comm, fact);CHKERRQ(ierr);
1734   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1735   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1736   ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr);
1737   ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr);
1738   ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1739   ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1740   (*fact)->factor = FACTOR_LU;
1741 
1742   ierr = PetscObjectContainerCreate(A->comm, &c);
1743   ierr = PetscObjectContainerSetPointer(c, lgraph_p);
1744   ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1745   PetscFunctionReturn(0);
1746 }
1747 
1748 #undef __FUNCT__
1749 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1750 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1751 {
1752   PetscFunctionBegin;
1753   PetscFunctionReturn(0);
1754 }
1755 
1756 #undef __FUNCT__
1757 #define __FUNCT__ "MatSolve_MPIAIJ"
1758 /*
1759   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1760 */
1761 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1762 {
1763   namespace graph_dist = boost::graph::distributed;
1764 
1765   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1766   lgraph_type*   lgraph_p;
1767   PetscObjectContainer c;
1768   PetscErrorCode       ierr;
1769 
1770   PetscFunctionBegin;
1771   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1772   ierr = PetscObjectContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1773   ierr = VecCopy(b, x); CHKERRQ(ierr);
1774 
1775   PetscScalar* array_x;
1776   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1777   PetscInt sx;
1778   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1779 
1780   PetscScalar* array_b;
1781   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1782   PetscInt sb;
1783   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1784 
1785   lgraph_type&   level_graph = *lgraph_p;
1786   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1787 
1788   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1789   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1790                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1791 
1792   typedef boost::iterator_property_map<array_ref_type::iterator,
1793                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1794   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1795                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1796 
1797   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1798 
1799   PetscFunctionReturn(0);
1800 }
1801 #endif
1802 
1803 /* -------------------------------------------------------------------*/
1804 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1805        MatGetRow_MPIAIJ,
1806        MatRestoreRow_MPIAIJ,
1807        MatMult_MPIAIJ,
1808 /* 4*/ MatMultAdd_MPIAIJ,
1809        MatMultTranspose_MPIAIJ,
1810        MatMultTransposeAdd_MPIAIJ,
1811 #ifdef PETSC_HAVE_PBGL
1812        MatSolve_MPIAIJ,
1813 #else
1814        0,
1815 #endif
1816        0,
1817        0,
1818 /*10*/ 0,
1819        0,
1820        0,
1821        MatRelax_MPIAIJ,
1822        MatTranspose_MPIAIJ,
1823 /*15*/ MatGetInfo_MPIAIJ,
1824        MatEqual_MPIAIJ,
1825        MatGetDiagonal_MPIAIJ,
1826        MatDiagonalScale_MPIAIJ,
1827        MatNorm_MPIAIJ,
1828 /*20*/ MatAssemblyBegin_MPIAIJ,
1829        MatAssemblyEnd_MPIAIJ,
1830        0,
1831        MatSetOption_MPIAIJ,
1832        MatZeroEntries_MPIAIJ,
1833 /*25*/ MatZeroRows_MPIAIJ,
1834        0,
1835 #ifdef PETSC_HAVE_PBGL
1836        MatLUFactorNumeric_MPIAIJ,
1837 #else
1838        0,
1839 #endif
1840        0,
1841        0,
1842 /*30*/ MatSetUpPreallocation_MPIAIJ,
1843 #ifdef PETSC_HAVE_PBGL
1844        MatILUFactorSymbolic_MPIAIJ,
1845 #else
1846        0,
1847 #endif
1848        0,
1849        0,
1850        0,
1851 /*35*/ MatDuplicate_MPIAIJ,
1852        0,
1853        0,
1854        0,
1855        0,
1856 /*40*/ MatAXPY_MPIAIJ,
1857        MatGetSubMatrices_MPIAIJ,
1858        MatIncreaseOverlap_MPIAIJ,
1859        MatGetValues_MPIAIJ,
1860        MatCopy_MPIAIJ,
1861 /*45*/ 0,
1862        MatScale_MPIAIJ,
1863        0,
1864        0,
1865        0,
1866 /*50*/ MatSetBlockSize_MPIAIJ,
1867        0,
1868        0,
1869        0,
1870        0,
1871 /*55*/ MatFDColoringCreate_MPIAIJ,
1872        0,
1873        MatSetUnfactored_MPIAIJ,
1874        MatPermute_MPIAIJ,
1875        0,
1876 /*60*/ MatGetSubMatrix_MPIAIJ,
1877        MatDestroy_MPIAIJ,
1878        MatView_MPIAIJ,
1879        0,
1880        0,
1881 /*65*/ 0,
1882        0,
1883        0,
1884        0,
1885        0,
1886 /*70*/ 0,
1887        0,
1888        MatSetColoring_MPIAIJ,
1889 #if defined(PETSC_HAVE_ADIC)
1890        MatSetValuesAdic_MPIAIJ,
1891 #else
1892        0,
1893 #endif
1894        MatSetValuesAdifor_MPIAIJ,
1895 /*75*/ 0,
1896        0,
1897        0,
1898        0,
1899        0,
1900 /*80*/ 0,
1901        0,
1902        0,
1903        0,
1904 /*84*/ MatLoad_MPIAIJ,
1905        0,
1906        0,
1907        0,
1908        0,
1909        0,
1910 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1911        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1912        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1913        MatPtAP_Basic,
1914        MatPtAPSymbolic_MPIAIJ,
1915 /*95*/ MatPtAPNumeric_MPIAIJ,
1916        0,
1917        0,
1918        0,
1919        0,
1920 /*100*/0,
1921        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1922        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1923        MatConjugate_MPIAIJ,
1924        0,
1925 /*105*/MatSetValuesRow_MPIAIJ,
1926        MatRealPart_MPIAIJ,
1927        MatImaginaryPart_MPIAIJ};
1928 
1929 /* ----------------------------------------------------------------------------------------*/
1930 
1931 EXTERN_C_BEGIN
1932 #undef __FUNCT__
1933 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1934 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
1935 {
1936   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1937   PetscErrorCode ierr;
1938 
1939   PetscFunctionBegin;
1940   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1941   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1942   PetscFunctionReturn(0);
1943 }
1944 EXTERN_C_END
1945 
1946 EXTERN_C_BEGIN
1947 #undef __FUNCT__
1948 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1949 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
1950 {
1951   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1952   PetscErrorCode ierr;
1953 
1954   PetscFunctionBegin;
1955   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1956   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1957   PetscFunctionReturn(0);
1958 }
1959 EXTERN_C_END
1960 
1961 #include "petscpc.h"
1962 EXTERN_C_BEGIN
1963 #undef __FUNCT__
1964 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1965 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1966 {
1967   Mat_MPIAIJ     *b;
1968   PetscErrorCode ierr;
1969   PetscInt       i;
1970 
1971   PetscFunctionBegin;
1972   B->preallocated = PETSC_TRUE;
1973   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1974   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1975   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1976   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1977 
1978   B->rmap.bs = B->cmap.bs = 1;
1979   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
1980   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
1981   if (d_nnz) {
1982     for (i=0; i<B->rmap.n; i++) {
1983       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1984     }
1985   }
1986   if (o_nnz) {
1987     for (i=0; i<B->rmap.n; i++) {
1988       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1989     }
1990   }
1991   b = (Mat_MPIAIJ*)B->data;
1992 
1993   /* Explicitly create 2 MATSEQAIJ matrices. */
1994   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1995   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
1996   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
1997   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1998   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1999   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2000   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2001   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2002 
2003   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2004   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2005 
2006   PetscFunctionReturn(0);
2007 }
2008 EXTERN_C_END
2009 
2010 #undef __FUNCT__
2011 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2012 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2013 {
2014   Mat            mat;
2015   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2016   PetscErrorCode ierr;
2017 
2018   PetscFunctionBegin;
2019   *newmat       = 0;
2020   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2021   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2022   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2023   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2024   a    = (Mat_MPIAIJ*)mat->data;
2025 
2026   mat->factor       = matin->factor;
2027   mat->rmap.bs      = matin->rmap.bs;
2028   mat->assembled    = PETSC_TRUE;
2029   mat->insertmode   = NOT_SET_VALUES;
2030   mat->preallocated = PETSC_TRUE;
2031 
2032   a->size           = oldmat->size;
2033   a->rank           = oldmat->rank;
2034   a->donotstash     = oldmat->donotstash;
2035   a->roworiented    = oldmat->roworiented;
2036   a->rowindices     = 0;
2037   a->rowvalues      = 0;
2038   a->getrowactive   = PETSC_FALSE;
2039 
2040   ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2041   ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2042 
2043   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2044   if (oldmat->colmap) {
2045 #if defined (PETSC_USE_CTABLE)
2046     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2047 #else
2048     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2049     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2050     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2051 #endif
2052   } else a->colmap = 0;
2053   if (oldmat->garray) {
2054     PetscInt len;
2055     len  = oldmat->B->cmap.n;
2056     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2057     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2058     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2059   } else a->garray = 0;
2060 
2061   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2062   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2063   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2064   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2065   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2066   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2067   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2068   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2069   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2070   *newmat = mat;
2071   PetscFunctionReturn(0);
2072 }
2073 
2074 #include "petscsys.h"
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatLoad_MPIAIJ"
2078 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2079 {
2080   Mat            A;
2081   PetscScalar    *vals,*svals;
2082   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2083   MPI_Status     status;
2084   PetscErrorCode ierr;
2085   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2086   PetscInt       i,nz,j,rstart,rend,mmax;
2087   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2088   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2089   PetscInt       cend,cstart,n,*rowners;
2090   int            fd;
2091 
2092   PetscFunctionBegin;
2093   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2094   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2095   if (!rank) {
2096     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2097     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2098     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2099   }
2100 
2101   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2102   M = header[1]; N = header[2];
2103   /* determine ownership of all rows */
2104   m    = M/size + ((M % size) > rank);
2105   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2106   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2107 
2108   /* First process needs enough room for process with most rows */
2109   if (!rank) {
2110     mmax       = rowners[1];
2111     for (i=2; i<size; i++) {
2112       mmax = PetscMax(mmax,rowners[i]);
2113     }
2114   } else mmax = m;
2115 
2116   rowners[0] = 0;
2117   for (i=2; i<=size; i++) {
2118     mmax       = PetscMax(mmax,rowners[i]);
2119     rowners[i] += rowners[i-1];
2120   }
2121   rstart = rowners[rank];
2122   rend   = rowners[rank+1];
2123 
2124   /* distribute row lengths to all processors */
2125   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2126   if (!rank) {
2127     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2128     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2129     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2130     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2131     for (j=0; j<m; j++) {
2132       procsnz[0] += ourlens[j];
2133     }
2134     for (i=1; i<size; i++) {
2135       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2136       /* calculate the number of nonzeros on each processor */
2137       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2138         procsnz[i] += rowlengths[j];
2139       }
2140       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2141     }
2142     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2143   } else {
2144     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2145   }
2146 
2147   if (!rank) {
2148     /* determine max buffer needed and allocate it */
2149     maxnz = 0;
2150     for (i=0; i<size; i++) {
2151       maxnz = PetscMax(maxnz,procsnz[i]);
2152     }
2153     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2154 
2155     /* read in my part of the matrix column indices  */
2156     nz   = procsnz[0];
2157     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2158     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2159 
2160     /* read in every one elses and ship off */
2161     for (i=1; i<size; i++) {
2162       nz   = procsnz[i];
2163       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2164       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2165     }
2166     ierr = PetscFree(cols);CHKERRQ(ierr);
2167   } else {
2168     /* determine buffer space needed for message */
2169     nz = 0;
2170     for (i=0; i<m; i++) {
2171       nz += ourlens[i];
2172     }
2173     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2174 
2175     /* receive message of column indices*/
2176     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2177     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2178     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2179   }
2180 
2181   /* determine column ownership if matrix is not square */
2182   if (N != M) {
2183     n      = N/size + ((N % size) > rank);
2184     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2185     cstart = cend - n;
2186   } else {
2187     cstart = rstart;
2188     cend   = rend;
2189     n      = cend - cstart;
2190   }
2191 
2192   /* loop over local rows, determining number of off diagonal entries */
2193   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2194   jj = 0;
2195   for (i=0; i<m; i++) {
2196     for (j=0; j<ourlens[i]; j++) {
2197       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2198       jj++;
2199     }
2200   }
2201 
2202   /* create our matrix */
2203   for (i=0; i<m; i++) {
2204     ourlens[i] -= offlens[i];
2205   }
2206   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2207   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2208   ierr = MatSetType(A,type);CHKERRQ(ierr);
2209   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2210 
2211   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2212   for (i=0; i<m; i++) {
2213     ourlens[i] += offlens[i];
2214   }
2215 
2216   if (!rank) {
2217     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2218 
2219     /* read in my part of the matrix numerical values  */
2220     nz   = procsnz[0];
2221     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2222 
2223     /* insert into matrix */
2224     jj      = rstart;
2225     smycols = mycols;
2226     svals   = vals;
2227     for (i=0; i<m; i++) {
2228       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2229       smycols += ourlens[i];
2230       svals   += ourlens[i];
2231       jj++;
2232     }
2233 
2234     /* read in other processors and ship out */
2235     for (i=1; i<size; i++) {
2236       nz   = procsnz[i];
2237       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2238       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2239     }
2240     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2241   } else {
2242     /* receive numeric values */
2243     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2244 
2245     /* receive message of values*/
2246     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2247     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2248     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2249 
2250     /* insert into matrix */
2251     jj      = rstart;
2252     smycols = mycols;
2253     svals   = vals;
2254     for (i=0; i<m; i++) {
2255       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2256       smycols += ourlens[i];
2257       svals   += ourlens[i];
2258       jj++;
2259     }
2260   }
2261   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2262   ierr = PetscFree(vals);CHKERRQ(ierr);
2263   ierr = PetscFree(mycols);CHKERRQ(ierr);
2264   ierr = PetscFree(rowners);CHKERRQ(ierr);
2265 
2266   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2267   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2268   *newmat = A;
2269   PetscFunctionReturn(0);
2270 }
2271 
2272 #undef __FUNCT__
2273 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2274 /*
2275     Not great since it makes two copies of the submatrix, first an SeqAIJ
2276   in local and then by concatenating the local matrices the end result.
2277   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2278 */
2279 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2280 {
2281   PetscErrorCode ierr;
2282   PetscMPIInt    rank,size;
2283   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2284   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2285   Mat            *local,M,Mreuse;
2286   PetscScalar    *vwork,*aa;
2287   MPI_Comm       comm = mat->comm;
2288   Mat_SeqAIJ     *aij;
2289 
2290 
2291   PetscFunctionBegin;
2292   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2293   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2294 
2295   if (call ==  MAT_REUSE_MATRIX) {
2296     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2297     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2298     local = &Mreuse;
2299     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2300   } else {
2301     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2302     Mreuse = *local;
2303     ierr   = PetscFree(local);CHKERRQ(ierr);
2304   }
2305 
2306   /*
2307       m - number of local rows
2308       n - number of columns (same on all processors)
2309       rstart - first row in new global matrix generated
2310   */
2311   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2312   if (call == MAT_INITIAL_MATRIX) {
2313     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2314     ii  = aij->i;
2315     jj  = aij->j;
2316 
2317     /*
2318         Determine the number of non-zeros in the diagonal and off-diagonal
2319         portions of the matrix in order to do correct preallocation
2320     */
2321 
2322     /* first get start and end of "diagonal" columns */
2323     if (csize == PETSC_DECIDE) {
2324       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2325       if (mglobal == n) { /* square matrix */
2326 	nlocal = m;
2327       } else {
2328         nlocal = n/size + ((n % size) > rank);
2329       }
2330     } else {
2331       nlocal = csize;
2332     }
2333     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2334     rstart = rend - nlocal;
2335     if (rank == size - 1 && rend != n) {
2336       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2337     }
2338 
2339     /* next, compute all the lengths */
2340     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2341     olens = dlens + m;
2342     for (i=0; i<m; i++) {
2343       jend = ii[i+1] - ii[i];
2344       olen = 0;
2345       dlen = 0;
2346       for (j=0; j<jend; j++) {
2347         if (*jj < rstart || *jj >= rend) olen++;
2348         else dlen++;
2349         jj++;
2350       }
2351       olens[i] = olen;
2352       dlens[i] = dlen;
2353     }
2354     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2355     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2356     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2357     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2358     ierr = PetscFree(dlens);CHKERRQ(ierr);
2359   } else {
2360     PetscInt ml,nl;
2361 
2362     M = *newmat;
2363     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2364     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2365     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2366     /*
2367          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2368        rather than the slower MatSetValues().
2369     */
2370     M->was_assembled = PETSC_TRUE;
2371     M->assembled     = PETSC_FALSE;
2372   }
2373   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2374   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2375   ii  = aij->i;
2376   jj  = aij->j;
2377   aa  = aij->a;
2378   for (i=0; i<m; i++) {
2379     row   = rstart + i;
2380     nz    = ii[i+1] - ii[i];
2381     cwork = jj;     jj += nz;
2382     vwork = aa;     aa += nz;
2383     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2384   }
2385 
2386   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2387   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2388   *newmat = M;
2389 
2390   /* save submatrix used in processor for next request */
2391   if (call ==  MAT_INITIAL_MATRIX) {
2392     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2393     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2394   }
2395 
2396   PetscFunctionReturn(0);
2397 }
2398 
2399 EXTERN_C_BEGIN
2400 #undef __FUNCT__
2401 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2402 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2403 {
2404   PetscInt       m,cstart, cend,j,nnz,i,d;
2405   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2406   const PetscInt *JJ;
2407   PetscScalar    *values;
2408   PetscErrorCode ierr;
2409 
2410   PetscFunctionBegin;
2411   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2412 
2413   B->rmap.bs = B->cmap.bs = 1;
2414   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2415   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2416   m      = B->rmap.n;
2417   cstart = B->cmap.rstart;
2418   cend   = B->cmap.rend;
2419   rstart = B->rmap.rstart;
2420 
2421   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2422   o_nnz = d_nnz + m;
2423 
2424   for (i=0; i<m; i++) {
2425     nnz     = Ii[i+1]- Ii[i];
2426     JJ      = J + Ii[i];
2427     nnz_max = PetscMax(nnz_max,nnz);
2428     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2429     for (j=0; j<nnz; j++) {
2430       if (*JJ >= cstart) break;
2431       JJ++;
2432     }
2433     d = 0;
2434     for (; j<nnz; j++) {
2435       if (*JJ++ >= cend) break;
2436       d++;
2437     }
2438     d_nnz[i] = d;
2439     o_nnz[i] = nnz - d;
2440   }
2441   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2442   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2443 
2444   if (v) values = (PetscScalar*)v;
2445   else {
2446     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2447     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2448   }
2449 
2450   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2451   for (i=0; i<m; i++) {
2452     ii   = i + rstart;
2453     nnz  = Ii[i+1]- Ii[i];
2454     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2455   }
2456   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2457   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2458   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2459 
2460   if (!v) {
2461     ierr = PetscFree(values);CHKERRQ(ierr);
2462   }
2463   PetscFunctionReturn(0);
2464 }
2465 EXTERN_C_END
2466 
2467 #undef __FUNCT__
2468 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2469 /*@
2470    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2471    (the default parallel PETSc format).
2472 
2473    Collective on MPI_Comm
2474 
2475    Input Parameters:
2476 +  B - the matrix
2477 .  i - the indices into j for the start of each local row (starts with zero)
2478 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2479 -  v - optional values in the matrix
2480 
2481    Level: developer
2482 
2483    Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal
2484      storage format. Thus changing the values in a[] after this call will not effect the matrix values.
2485 
2486 .keywords: matrix, aij, compressed row, sparse, parallel
2487 
2488 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
2489           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
2490 @*/
2491 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2492 {
2493   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2494 
2495   PetscFunctionBegin;
2496   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2497   if (f) {
2498     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2499   }
2500   PetscFunctionReturn(0);
2501 }
2502 
2503 #undef __FUNCT__
2504 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2505 /*@C
2506    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2507    (the default parallel PETSc format).  For good matrix assembly performance
2508    the user should preallocate the matrix storage by setting the parameters
2509    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2510    performance can be increased by more than a factor of 50.
2511 
2512    Collective on MPI_Comm
2513 
2514    Input Parameters:
2515 +  A - the matrix
2516 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2517            (same value is used for all local rows)
2518 .  d_nnz - array containing the number of nonzeros in the various rows of the
2519            DIAGONAL portion of the local submatrix (possibly different for each row)
2520            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2521            The size of this array is equal to the number of local rows, i.e 'm'.
2522            You must leave room for the diagonal entry even if it is zero.
2523 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2524            submatrix (same value is used for all local rows).
2525 -  o_nnz - array containing the number of nonzeros in the various rows of the
2526            OFF-DIAGONAL portion of the local submatrix (possibly different for
2527            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2528            structure. The size of this array is equal to the number
2529            of local rows, i.e 'm'.
2530 
2531    If the *_nnz parameter is given then the *_nz parameter is ignored
2532 
2533    The AIJ format (also called the Yale sparse matrix format or
2534    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2535    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2536 
2537    The parallel matrix is partitioned such that the first m0 rows belong to
2538    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2539    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2540 
2541    The DIAGONAL portion of the local submatrix of a processor can be defined
2542    as the submatrix which is obtained by extraction the part corresponding
2543    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2544    first row that belongs to the processor, and r2 is the last row belonging
2545    to the this processor. This is a square mxm matrix. The remaining portion
2546    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2547 
2548    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2549 
2550    Example usage:
2551 
2552    Consider the following 8x8 matrix with 34 non-zero values, that is
2553    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2554    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2555    as follows:
2556 
2557 .vb
2558             1  2  0  |  0  3  0  |  0  4
2559     Proc0   0  5  6  |  7  0  0  |  8  0
2560             9  0 10  | 11  0  0  | 12  0
2561     -------------------------------------
2562            13  0 14  | 15 16 17  |  0  0
2563     Proc1   0 18  0  | 19 20 21  |  0  0
2564             0  0  0  | 22 23  0  | 24  0
2565     -------------------------------------
2566     Proc2  25 26 27  |  0  0 28  | 29  0
2567            30  0  0  | 31 32 33  |  0 34
2568 .ve
2569 
2570    This can be represented as a collection of submatrices as:
2571 
2572 .vb
2573       A B C
2574       D E F
2575       G H I
2576 .ve
2577 
2578    Where the submatrices A,B,C are owned by proc0, D,E,F are
2579    owned by proc1, G,H,I are owned by proc2.
2580 
2581    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2582    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2583    The 'M','N' parameters are 8,8, and have the same values on all procs.
2584 
2585    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2586    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2587    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2588    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2589    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2590    matrix, ans [DF] as another SeqAIJ matrix.
2591 
2592    When d_nz, o_nz parameters are specified, d_nz storage elements are
2593    allocated for every row of the local diagonal submatrix, and o_nz
2594    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2595    One way to choose d_nz and o_nz is to use the max nonzerors per local
2596    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2597    In this case, the values of d_nz,o_nz are:
2598 .vb
2599      proc0 : dnz = 2, o_nz = 2
2600      proc1 : dnz = 3, o_nz = 2
2601      proc2 : dnz = 1, o_nz = 4
2602 .ve
2603    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2604    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2605    for proc3. i.e we are using 12+15+10=37 storage locations to store
2606    34 values.
2607 
2608    When d_nnz, o_nnz parameters are specified, the storage is specified
2609    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2610    In the above case the values for d_nnz,o_nnz are:
2611 .vb
2612      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2613      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2614      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2615 .ve
2616    Here the space allocated is sum of all the above values i.e 34, and
2617    hence pre-allocation is perfect.
2618 
2619    Level: intermediate
2620 
2621 .keywords: matrix, aij, compressed row, sparse, parallel
2622 
2623 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2624           MPIAIJ
2625 @*/
2626 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2627 {
2628   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2629 
2630   PetscFunctionBegin;
2631   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2632   if (f) {
2633     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2634   }
2635   PetscFunctionReturn(0);
2636 }
2637 
2638 #undef __FUNCT__
2639 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
2640 /*@C
2641      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
2642          CSR format the local rows.
2643 
2644    Collective on MPI_Comm
2645 
2646    Input Parameters:
2647 +  comm - MPI communicator
2648 .  m - number of local rows (Cannot be PETSC_DECIDE)
2649 .  n - This value should be the same as the local size used in creating the
2650        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2651        calculated if N is given) For square matrices n is almost always m.
2652 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2653 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2654 .   i - row indices
2655 .   j - column indices
2656 -   a - matrix values
2657 
2658    Output Parameter:
2659 .   mat - the matrix
2660 
2661    Level: intermediate
2662 
2663    Notes:
2664        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2665      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2666      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2667 
2668        The i and j indices are 0 based
2669 
2670 .keywords: matrix, aij, compressed row, sparse, parallel
2671 
2672 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2673           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
2674 @*/
2675 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2676 {
2677   PetscErrorCode ierr;
2678 
2679  PetscFunctionBegin;
2680   if (i[0]) {
2681     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2682   }
2683   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2684   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2685   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
2686   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
2687   PetscFunctionReturn(0);
2688 }
2689 
2690 #undef __FUNCT__
2691 #define __FUNCT__ "MatCreateMPIAIJ"
2692 /*@C
2693    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2694    (the default parallel PETSc format).  For good matrix assembly performance
2695    the user should preallocate the matrix storage by setting the parameters
2696    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2697    performance can be increased by more than a factor of 50.
2698 
2699    Collective on MPI_Comm
2700 
2701    Input Parameters:
2702 +  comm - MPI communicator
2703 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2704            This value should be the same as the local size used in creating the
2705            y vector for the matrix-vector product y = Ax.
2706 .  n - This value should be the same as the local size used in creating the
2707        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2708        calculated if N is given) For square matrices n is almost always m.
2709 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2710 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2711 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2712            (same value is used for all local rows)
2713 .  d_nnz - array containing the number of nonzeros in the various rows of the
2714            DIAGONAL portion of the local submatrix (possibly different for each row)
2715            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2716            The size of this array is equal to the number of local rows, i.e 'm'.
2717            You must leave room for the diagonal entry even if it is zero.
2718 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2719            submatrix (same value is used for all local rows).
2720 -  o_nnz - array containing the number of nonzeros in the various rows of the
2721            OFF-DIAGONAL portion of the local submatrix (possibly different for
2722            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2723            structure. The size of this array is equal to the number
2724            of local rows, i.e 'm'.
2725 
2726    Output Parameter:
2727 .  A - the matrix
2728 
2729    Notes:
2730    If the *_nnz parameter is given then the *_nz parameter is ignored
2731 
2732    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2733    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2734    storage requirements for this matrix.
2735 
2736    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2737    processor than it must be used on all processors that share the object for
2738    that argument.
2739 
2740    The user MUST specify either the local or global matrix dimensions
2741    (possibly both).
2742 
2743    The parallel matrix is partitioned such that the first m0 rows belong to
2744    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2745    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2746 
2747    The DIAGONAL portion of the local submatrix of a processor can be defined
2748    as the submatrix which is obtained by extraction the part corresponding
2749    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2750    first row that belongs to the processor, and r2 is the last row belonging
2751    to the this processor. This is a square mxm matrix. The remaining portion
2752    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2753 
2754    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2755 
2756    When calling this routine with a single process communicator, a matrix of
2757    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2758    type of communicator, use the construction mechanism:
2759      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2760 
2761    By default, this format uses inodes (identical nodes) when possible.
2762    We search for consecutive rows with the same nonzero structure, thereby
2763    reusing matrix information to achieve increased efficiency.
2764 
2765    Options Database Keys:
2766 +  -mat_no_inode  - Do not use inodes
2767 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
2768 -  -mat_aij_oneindex - Internally use indexing starting at 1
2769         rather than 0.  Note that when calling MatSetValues(),
2770         the user still MUST index entries starting at 0!
2771 
2772 
2773    Example usage:
2774 
2775    Consider the following 8x8 matrix with 34 non-zero values, that is
2776    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2777    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2778    as follows:
2779 
2780 .vb
2781             1  2  0  |  0  3  0  |  0  4
2782     Proc0   0  5  6  |  7  0  0  |  8  0
2783             9  0 10  | 11  0  0  | 12  0
2784     -------------------------------------
2785            13  0 14  | 15 16 17  |  0  0
2786     Proc1   0 18  0  | 19 20 21  |  0  0
2787             0  0  0  | 22 23  0  | 24  0
2788     -------------------------------------
2789     Proc2  25 26 27  |  0  0 28  | 29  0
2790            30  0  0  | 31 32 33  |  0 34
2791 .ve
2792 
2793    This can be represented as a collection of submatrices as:
2794 
2795 .vb
2796       A B C
2797       D E F
2798       G H I
2799 .ve
2800 
2801    Where the submatrices A,B,C are owned by proc0, D,E,F are
2802    owned by proc1, G,H,I are owned by proc2.
2803 
2804    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2805    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2806    The 'M','N' parameters are 8,8, and have the same values on all procs.
2807 
2808    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2809    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2810    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2811    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2812    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2813    matrix, ans [DF] as another SeqAIJ matrix.
2814 
2815    When d_nz, o_nz parameters are specified, d_nz storage elements are
2816    allocated for every row of the local diagonal submatrix, and o_nz
2817    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2818    One way to choose d_nz and o_nz is to use the max nonzerors per local
2819    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2820    In this case, the values of d_nz,o_nz are:
2821 .vb
2822      proc0 : dnz = 2, o_nz = 2
2823      proc1 : dnz = 3, o_nz = 2
2824      proc2 : dnz = 1, o_nz = 4
2825 .ve
2826    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2827    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2828    for proc3. i.e we are using 12+15+10=37 storage locations to store
2829    34 values.
2830 
2831    When d_nnz, o_nnz parameters are specified, the storage is specified
2832    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2833    In the above case the values for d_nnz,o_nnz are:
2834 .vb
2835      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2836      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2837      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2838 .ve
2839    Here the space allocated is sum of all the above values i.e 34, and
2840    hence pre-allocation is perfect.
2841 
2842    Level: intermediate
2843 
2844 .keywords: matrix, aij, compressed row, sparse, parallel
2845 
2846 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2847           MPIAIJ, MatCreateMPIAIJWithArrays()
2848 @*/
2849 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2850 {
2851   PetscErrorCode ierr;
2852   PetscMPIInt    size;
2853 
2854   PetscFunctionBegin;
2855   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2856   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2857   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2858   if (size > 1) {
2859     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2860     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2861   } else {
2862     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2863     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2864   }
2865   PetscFunctionReturn(0);
2866 }
2867 
2868 #undef __FUNCT__
2869 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2870 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2871 {
2872   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2873 
2874   PetscFunctionBegin;
2875   *Ad     = a->A;
2876   *Ao     = a->B;
2877   *colmap = a->garray;
2878   PetscFunctionReturn(0);
2879 }
2880 
2881 #undef __FUNCT__
2882 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2883 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2884 {
2885   PetscErrorCode ierr;
2886   PetscInt       i;
2887   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2888 
2889   PetscFunctionBegin;
2890   if (coloring->ctype == IS_COLORING_GLOBAL) {
2891     ISColoringValue *allcolors,*colors;
2892     ISColoring      ocoloring;
2893 
2894     /* set coloring for diagonal portion */
2895     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2896 
2897     /* set coloring for off-diagonal portion */
2898     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2899     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2900     for (i=0; i<a->B->cmap.n; i++) {
2901       colors[i] = allcolors[a->garray[i]];
2902     }
2903     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2904     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2905     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2906     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2907   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2908     ISColoringValue *colors;
2909     PetscInt        *larray;
2910     ISColoring      ocoloring;
2911 
2912     /* set coloring for diagonal portion */
2913     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2914     for (i=0; i<a->A->cmap.n; i++) {
2915       larray[i] = i + A->cmap.rstart;
2916     }
2917     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2918     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2919     for (i=0; i<a->A->cmap.n; i++) {
2920       colors[i] = coloring->colors[larray[i]];
2921     }
2922     ierr = PetscFree(larray);CHKERRQ(ierr);
2923     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2924     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2925     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2926 
2927     /* set coloring for off-diagonal portion */
2928     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2929     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2930     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2931     for (i=0; i<a->B->cmap.n; i++) {
2932       colors[i] = coloring->colors[larray[i]];
2933     }
2934     ierr = PetscFree(larray);CHKERRQ(ierr);
2935     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2936     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2937     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2938   } else {
2939     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2940   }
2941 
2942   PetscFunctionReturn(0);
2943 }
2944 
2945 #if defined(PETSC_HAVE_ADIC)
2946 #undef __FUNCT__
2947 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2948 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2949 {
2950   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2951   PetscErrorCode ierr;
2952 
2953   PetscFunctionBegin;
2954   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2955   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2956   PetscFunctionReturn(0);
2957 }
2958 #endif
2959 
2960 #undef __FUNCT__
2961 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2962 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2963 {
2964   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2965   PetscErrorCode ierr;
2966 
2967   PetscFunctionBegin;
2968   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2969   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2970   PetscFunctionReturn(0);
2971 }
2972 
2973 #undef __FUNCT__
2974 #define __FUNCT__ "MatMerge"
2975 /*@C
2976       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2977                  matrices from each processor
2978 
2979     Collective on MPI_Comm
2980 
2981    Input Parameters:
2982 +    comm - the communicators the parallel matrix will live on
2983 .    inmat - the input sequential matrices
2984 .    n - number of local columns (or PETSC_DECIDE)
2985 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2986 
2987    Output Parameter:
2988 .    outmat - the parallel matrix generated
2989 
2990     Level: advanced
2991 
2992    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2993 
2994 @*/
2995 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2996 {
2997   PetscErrorCode ierr;
2998   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
2999   PetscInt       *indx;
3000   PetscScalar    *values;
3001 
3002   PetscFunctionBegin;
3003   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3004   if (scall == MAT_INITIAL_MATRIX){
3005     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3006     if (n == PETSC_DECIDE){
3007       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3008     }
3009     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3010     rstart -= m;
3011 
3012     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3013     for (i=0;i<m;i++) {
3014       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3015       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3016       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3017     }
3018     /* This routine will ONLY return MPIAIJ type matrix */
3019     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3020     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3021     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3022     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3023     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3024 
3025   } else if (scall == MAT_REUSE_MATRIX){
3026     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3027   } else {
3028     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3029   }
3030 
3031   for (i=0;i<m;i++) {
3032     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3033     Ii    = i + rstart;
3034     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3035     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3036   }
3037   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3038   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3039   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3040 
3041   PetscFunctionReturn(0);
3042 }
3043 
3044 #undef __FUNCT__
3045 #define __FUNCT__ "MatFileSplit"
3046 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3047 {
3048   PetscErrorCode    ierr;
3049   PetscMPIInt       rank;
3050   PetscInt          m,N,i,rstart,nnz;
3051   size_t            len;
3052   const PetscInt    *indx;
3053   PetscViewer       out;
3054   char              *name;
3055   Mat               B;
3056   const PetscScalar *values;
3057 
3058   PetscFunctionBegin;
3059   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3060   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3061   /* Should this be the type of the diagonal block of A? */
3062   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3063   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3064   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3065   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3066   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3067   for (i=0;i<m;i++) {
3068     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3069     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3070     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3071   }
3072   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3073   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3074 
3075   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
3076   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3077   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3078   sprintf(name,"%s.%d",outfile,rank);
3079   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3080   ierr = PetscFree(name);
3081   ierr = MatView(B,out);CHKERRQ(ierr);
3082   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3083   ierr = MatDestroy(B);CHKERRQ(ierr);
3084   PetscFunctionReturn(0);
3085 }
3086 
3087 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3088 #undef __FUNCT__
3089 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3090 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3091 {
3092   PetscErrorCode       ierr;
3093   Mat_Merge_SeqsToMPI  *merge;
3094   PetscObjectContainer container;
3095 
3096   PetscFunctionBegin;
3097   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3098   if (container) {
3099     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3100     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3101     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3102     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3103     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3104     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3105     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3106     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3107     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3108     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3109     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3110     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3111 
3112     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
3113     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3114   }
3115   ierr = PetscFree(merge);CHKERRQ(ierr);
3116 
3117   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3118   PetscFunctionReturn(0);
3119 }
3120 
3121 #include "src/mat/utils/freespace.h"
3122 #include "petscbt.h"
3123 static PetscEvent logkey_seqstompinum = 0;
3124 #undef __FUNCT__
3125 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3126 /*@C
3127       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3128                  matrices from each processor
3129 
3130     Collective on MPI_Comm
3131 
3132    Input Parameters:
3133 +    comm - the communicators the parallel matrix will live on
3134 .    seqmat - the input sequential matrices
3135 .    m - number of local rows (or PETSC_DECIDE)
3136 .    n - number of local columns (or PETSC_DECIDE)
3137 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3138 
3139    Output Parameter:
3140 .    mpimat - the parallel matrix generated
3141 
3142     Level: advanced
3143 
3144    Notes:
3145      The dimensions of the sequential matrix in each processor MUST be the same.
3146      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3147      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3148 @*/
3149 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3150 {
3151   PetscErrorCode       ierr;
3152   MPI_Comm             comm=mpimat->comm;
3153   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3154   PetscMPIInt          size,rank,taga,*len_s;
3155   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3156   PetscInt             proc,m;
3157   PetscInt             **buf_ri,**buf_rj;
3158   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3159   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3160   MPI_Request          *s_waits,*r_waits;
3161   MPI_Status           *status;
3162   MatScalar            *aa=a->a,**abuf_r,*ba_i;
3163   Mat_Merge_SeqsToMPI  *merge;
3164   PetscObjectContainer container;
3165 
3166   PetscFunctionBegin;
3167   if (!logkey_seqstompinum) {
3168     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
3169   }
3170   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3171 
3172   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3173   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3174 
3175   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3176   if (container) {
3177     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3178   }
3179   bi     = merge->bi;
3180   bj     = merge->bj;
3181   buf_ri = merge->buf_ri;
3182   buf_rj = merge->buf_rj;
3183 
3184   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3185   owners = merge->rowmap.range;
3186   len_s  = merge->len_s;
3187 
3188   /* send and recv matrix values */
3189   /*-----------------------------*/
3190   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3191   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3192 
3193   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3194   for (proc=0,k=0; proc<size; proc++){
3195     if (!len_s[proc]) continue;
3196     i = owners[proc];
3197     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3198     k++;
3199   }
3200 
3201   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3202   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3203   ierr = PetscFree(status);CHKERRQ(ierr);
3204 
3205   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3206   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3207 
3208   /* insert mat values of mpimat */
3209   /*----------------------------*/
3210   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3211   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3212   nextrow = buf_ri_k + merge->nrecv;
3213   nextai  = nextrow + merge->nrecv;
3214 
3215   for (k=0; k<merge->nrecv; k++){
3216     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3217     nrows = *(buf_ri_k[k]);
3218     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3219     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3220   }
3221 
3222   /* set values of ba */
3223   m = merge->rowmap.n;
3224   for (i=0; i<m; i++) {
3225     arow = owners[rank] + i;
3226     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3227     bnzi = bi[i+1] - bi[i];
3228     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3229 
3230     /* add local non-zero vals of this proc's seqmat into ba */
3231     anzi = ai[arow+1] - ai[arow];
3232     aj   = a->j + ai[arow];
3233     aa   = a->a + ai[arow];
3234     nextaj = 0;
3235     for (j=0; nextaj<anzi; j++){
3236       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3237         ba_i[j] += aa[nextaj++];
3238       }
3239     }
3240 
3241     /* add received vals into ba */
3242     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3243       /* i-th row */
3244       if (i == *nextrow[k]) {
3245         anzi = *(nextai[k]+1) - *nextai[k];
3246         aj   = buf_rj[k] + *(nextai[k]);
3247         aa   = abuf_r[k] + *(nextai[k]);
3248         nextaj = 0;
3249         for (j=0; nextaj<anzi; j++){
3250           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3251             ba_i[j] += aa[nextaj++];
3252           }
3253         }
3254         nextrow[k]++; nextai[k]++;
3255       }
3256     }
3257     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3258   }
3259   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3260   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3261 
3262   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3263   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3264   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3265   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3266   PetscFunctionReturn(0);
3267 }
3268 
3269 static PetscEvent logkey_seqstompisym = 0;
3270 #undef __FUNCT__
3271 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3272 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3273 {
3274   PetscErrorCode       ierr;
3275   Mat                  B_mpi;
3276   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3277   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3278   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3279   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3280   PetscInt             len,proc,*dnz,*onz;
3281   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3282   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3283   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3284   MPI_Status           *status;
3285   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3286   PetscBT              lnkbt;
3287   Mat_Merge_SeqsToMPI  *merge;
3288   PetscObjectContainer container;
3289 
3290   PetscFunctionBegin;
3291   if (!logkey_seqstompisym) {
3292     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3293   }
3294   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3295 
3296   /* make sure it is a PETSc comm */
3297   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3298   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3299   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3300 
3301   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3302   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3303 
3304   /* determine row ownership */
3305   /*---------------------------------------------------------*/
3306   merge->rowmap.n = m;
3307   merge->rowmap.N = M;
3308   merge->rowmap.bs = 1;
3309   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3310   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3311   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3312 
3313   m      = merge->rowmap.n;
3314   M      = merge->rowmap.N;
3315   owners = merge->rowmap.range;
3316 
3317   /* determine the number of messages to send, their lengths */
3318   /*---------------------------------------------------------*/
3319   len_s  = merge->len_s;
3320 
3321   len = 0;  /* length of buf_si[] */
3322   merge->nsend = 0;
3323   for (proc=0; proc<size; proc++){
3324     len_si[proc] = 0;
3325     if (proc == rank){
3326       len_s[proc] = 0;
3327     } else {
3328       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3329       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3330     }
3331     if (len_s[proc]) {
3332       merge->nsend++;
3333       nrows = 0;
3334       for (i=owners[proc]; i<owners[proc+1]; i++){
3335         if (ai[i+1] > ai[i]) nrows++;
3336       }
3337       len_si[proc] = 2*(nrows+1);
3338       len += len_si[proc];
3339     }
3340   }
3341 
3342   /* determine the number and length of messages to receive for ij-structure */
3343   /*-------------------------------------------------------------------------*/
3344   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3345   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3346 
3347   /* post the Irecv of j-structure */
3348   /*-------------------------------*/
3349   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
3350   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3351 
3352   /* post the Isend of j-structure */
3353   /*--------------------------------*/
3354   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3355   sj_waits = si_waits + merge->nsend;
3356 
3357   for (proc=0, k=0; proc<size; proc++){
3358     if (!len_s[proc]) continue;
3359     i = owners[proc];
3360     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3361     k++;
3362   }
3363 
3364   /* receives and sends of j-structure are complete */
3365   /*------------------------------------------------*/
3366   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3367   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3368 
3369   /* send and recv i-structure */
3370   /*---------------------------*/
3371   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
3372   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3373 
3374   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3375   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3376   for (proc=0,k=0; proc<size; proc++){
3377     if (!len_s[proc]) continue;
3378     /* form outgoing message for i-structure:
3379          buf_si[0]:                 nrows to be sent
3380                [1:nrows]:           row index (global)
3381                [nrows+1:2*nrows+1]: i-structure index
3382     */
3383     /*-------------------------------------------*/
3384     nrows = len_si[proc]/2 - 1;
3385     buf_si_i    = buf_si + nrows+1;
3386     buf_si[0]   = nrows;
3387     buf_si_i[0] = 0;
3388     nrows = 0;
3389     for (i=owners[proc]; i<owners[proc+1]; i++){
3390       anzi = ai[i+1] - ai[i];
3391       if (anzi) {
3392         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3393         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3394         nrows++;
3395       }
3396     }
3397     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3398     k++;
3399     buf_si += len_si[proc];
3400   }
3401 
3402   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3403   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3404 
3405   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3406   for (i=0; i<merge->nrecv; i++){
3407     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3408   }
3409 
3410   ierr = PetscFree(len_si);CHKERRQ(ierr);
3411   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3412   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3413   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3414   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3415   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3416   ierr = PetscFree(status);CHKERRQ(ierr);
3417 
3418   /* compute a local seq matrix in each processor */
3419   /*----------------------------------------------*/
3420   /* allocate bi array and free space for accumulating nonzero column info */
3421   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3422   bi[0] = 0;
3423 
3424   /* create and initialize a linked list */
3425   nlnk = N+1;
3426   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3427 
3428   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3429   len = 0;
3430   len  = ai[owners[rank+1]] - ai[owners[rank]];
3431   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3432   current_space = free_space;
3433 
3434   /* determine symbolic info for each local row */
3435   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3436   nextrow = buf_ri_k + merge->nrecv;
3437   nextai  = nextrow + merge->nrecv;
3438   for (k=0; k<merge->nrecv; k++){
3439     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3440     nrows = *buf_ri_k[k];
3441     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3442     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3443   }
3444 
3445   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3446   len = 0;
3447   for (i=0;i<m;i++) {
3448     bnzi   = 0;
3449     /* add local non-zero cols of this proc's seqmat into lnk */
3450     arow   = owners[rank] + i;
3451     anzi   = ai[arow+1] - ai[arow];
3452     aj     = a->j + ai[arow];
3453     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3454     bnzi += nlnk;
3455     /* add received col data into lnk */
3456     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3457       if (i == *nextrow[k]) { /* i-th row */
3458         anzi = *(nextai[k]+1) - *nextai[k];
3459         aj   = buf_rj[k] + *nextai[k];
3460         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3461         bnzi += nlnk;
3462         nextrow[k]++; nextai[k]++;
3463       }
3464     }
3465     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3466 
3467     /* if free space is not available, make more free space */
3468     if (current_space->local_remaining<bnzi) {
3469       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3470       nspacedouble++;
3471     }
3472     /* copy data into free space, then initialize lnk */
3473     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3474     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3475 
3476     current_space->array           += bnzi;
3477     current_space->local_used      += bnzi;
3478     current_space->local_remaining -= bnzi;
3479 
3480     bi[i+1] = bi[i] + bnzi;
3481   }
3482 
3483   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3484 
3485   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3486   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3487   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3488 
3489   /* create symbolic parallel matrix B_mpi */
3490   /*---------------------------------------*/
3491   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3492   if (n==PETSC_DECIDE) {
3493     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3494   } else {
3495     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3496   }
3497   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3498   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3499   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3500 
3501   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3502   B_mpi->assembled     = PETSC_FALSE;
3503   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3504   merge->bi            = bi;
3505   merge->bj            = bj;
3506   merge->buf_ri        = buf_ri;
3507   merge->buf_rj        = buf_rj;
3508   merge->coi           = PETSC_NULL;
3509   merge->coj           = PETSC_NULL;
3510   merge->owners_co     = PETSC_NULL;
3511 
3512   /* attach the supporting struct to B_mpi for reuse */
3513   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3514   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3515   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3516   *mpimat = B_mpi;
3517 
3518   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3519   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3520   PetscFunctionReturn(0);
3521 }
3522 
3523 static PetscEvent logkey_seqstompi = 0;
3524 #undef __FUNCT__
3525 #define __FUNCT__ "MatMerge_SeqsToMPI"
3526 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3527 {
3528   PetscErrorCode   ierr;
3529 
3530   PetscFunctionBegin;
3531   if (!logkey_seqstompi) {
3532     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3533   }
3534   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3535   if (scall == MAT_INITIAL_MATRIX){
3536     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3537   }
3538   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3539   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3540   PetscFunctionReturn(0);
3541 }
3542 static PetscEvent logkey_getlocalmat = 0;
3543 #undef __FUNCT__
3544 #define __FUNCT__ "MatGetLocalMat"
3545 /*@C
3546      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3547 
3548     Not Collective
3549 
3550    Input Parameters:
3551 +    A - the matrix
3552 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3553 
3554    Output Parameter:
3555 .    A_loc - the local sequential matrix generated
3556 
3557     Level: developer
3558 
3559 @*/
3560 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3561 {
3562   PetscErrorCode  ierr;
3563   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3564   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3565   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3566   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3567   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
3568   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3569 
3570   PetscFunctionBegin;
3571   if (!logkey_getlocalmat) {
3572     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3573   }
3574   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3575   if (scall == MAT_INITIAL_MATRIX){
3576     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3577     ci[0] = 0;
3578     for (i=0; i<am; i++){
3579       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3580     }
3581     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3582     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3583     k = 0;
3584     for (i=0; i<am; i++) {
3585       ncols_o = bi[i+1] - bi[i];
3586       ncols_d = ai[i+1] - ai[i];
3587       /* off-diagonal portion of A */
3588       for (jo=0; jo<ncols_o; jo++) {
3589         col = cmap[*bj];
3590         if (col >= cstart) break;
3591         cj[k]   = col; bj++;
3592         ca[k++] = *ba++;
3593       }
3594       /* diagonal portion of A */
3595       for (j=0; j<ncols_d; j++) {
3596         cj[k]   = cstart + *aj++;
3597         ca[k++] = *aa++;
3598       }
3599       /* off-diagonal portion of A */
3600       for (j=jo; j<ncols_o; j++) {
3601         cj[k]   = cmap[*bj++];
3602         ca[k++] = *ba++;
3603       }
3604     }
3605     /* put together the new matrix */
3606     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3607     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3608     /* Since these are PETSc arrays, change flags to free them as necessary. */
3609     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
3610     mat->free_a  = PETSC_TRUE;
3611     mat->free_ij = PETSC_TRUE;
3612     mat->nonew   = 0;
3613   } else if (scall == MAT_REUSE_MATRIX){
3614     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3615     ci = mat->i; cj = mat->j; ca = mat->a;
3616     for (i=0; i<am; i++) {
3617       /* off-diagonal portion of A */
3618       ncols_o = bi[i+1] - bi[i];
3619       for (jo=0; jo<ncols_o; jo++) {
3620         col = cmap[*bj];
3621         if (col >= cstart) break;
3622         *ca++ = *ba++; bj++;
3623       }
3624       /* diagonal portion of A */
3625       ncols_d = ai[i+1] - ai[i];
3626       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3627       /* off-diagonal portion of A */
3628       for (j=jo; j<ncols_o; j++) {
3629         *ca++ = *ba++; bj++;
3630       }
3631     }
3632   } else {
3633     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3634   }
3635 
3636   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3637   PetscFunctionReturn(0);
3638 }
3639 
3640 static PetscEvent logkey_getlocalmatcondensed = 0;
3641 #undef __FUNCT__
3642 #define __FUNCT__ "MatGetLocalMatCondensed"
3643 /*@C
3644      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3645 
3646     Not Collective
3647 
3648    Input Parameters:
3649 +    A - the matrix
3650 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3651 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3652 
3653    Output Parameter:
3654 .    A_loc - the local sequential matrix generated
3655 
3656     Level: developer
3657 
3658 @*/
3659 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3660 {
3661   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3662   PetscErrorCode    ierr;
3663   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3664   IS                isrowa,iscola;
3665   Mat               *aloc;
3666 
3667   PetscFunctionBegin;
3668   if (!logkey_getlocalmatcondensed) {
3669     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3670   }
3671   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3672   if (!row){
3673     start = A->rmap.rstart; end = A->rmap.rend;
3674     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3675   } else {
3676     isrowa = *row;
3677   }
3678   if (!col){
3679     start = A->cmap.rstart;
3680     cmap  = a->garray;
3681     nzA   = a->A->cmap.n;
3682     nzB   = a->B->cmap.n;
3683     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3684     ncols = 0;
3685     for (i=0; i<nzB; i++) {
3686       if (cmap[i] < start) idx[ncols++] = cmap[i];
3687       else break;
3688     }
3689     imark = i;
3690     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3691     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3692     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3693     ierr = PetscFree(idx);CHKERRQ(ierr);
3694   } else {
3695     iscola = *col;
3696   }
3697   if (scall != MAT_INITIAL_MATRIX){
3698     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3699     aloc[0] = *A_loc;
3700   }
3701   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3702   *A_loc = aloc[0];
3703   ierr = PetscFree(aloc);CHKERRQ(ierr);
3704   if (!row){
3705     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3706   }
3707   if (!col){
3708     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3709   }
3710   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3711   PetscFunctionReturn(0);
3712 }
3713 
3714 static PetscEvent logkey_GetBrowsOfAcols = 0;
3715 #undef __FUNCT__
3716 #define __FUNCT__ "MatGetBrowsOfAcols"
3717 /*@C
3718     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3719 
3720     Collective on Mat
3721 
3722    Input Parameters:
3723 +    A,B - the matrices in mpiaij format
3724 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3725 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3726 
3727    Output Parameter:
3728 +    rowb, colb - index sets of rows and columns of B to extract
3729 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
3730 -    B_seq - the sequential matrix generated
3731 
3732     Level: developer
3733 
3734 @*/
3735 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3736 {
3737   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3738   PetscErrorCode    ierr;
3739   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3740   IS                isrowb,iscolb;
3741   Mat               *bseq;
3742 
3743   PetscFunctionBegin;
3744   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
3745     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
3746   }
3747   if (!logkey_GetBrowsOfAcols) {
3748     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3749   }
3750   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3751 
3752   if (scall == MAT_INITIAL_MATRIX){
3753     start = A->cmap.rstart;
3754     cmap  = a->garray;
3755     nzA   = a->A->cmap.n;
3756     nzB   = a->B->cmap.n;
3757     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3758     ncols = 0;
3759     for (i=0; i<nzB; i++) {  /* row < local row index */
3760       if (cmap[i] < start) idx[ncols++] = cmap[i];
3761       else break;
3762     }
3763     imark = i;
3764     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3765     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3766     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3767     ierr = PetscFree(idx);CHKERRQ(ierr);
3768     *brstart = imark;
3769     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
3770   } else {
3771     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3772     isrowb = *rowb; iscolb = *colb;
3773     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3774     bseq[0] = *B_seq;
3775   }
3776   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3777   *B_seq = bseq[0];
3778   ierr = PetscFree(bseq);CHKERRQ(ierr);
3779   if (!rowb){
3780     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3781   } else {
3782     *rowb = isrowb;
3783   }
3784   if (!colb){
3785     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3786   } else {
3787     *colb = iscolb;
3788   }
3789   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3790   PetscFunctionReturn(0);
3791 }
3792 
3793 static PetscEvent logkey_GetBrowsOfAocols = 0;
3794 #undef __FUNCT__
3795 #define __FUNCT__ "MatGetBrowsOfAoCols"
3796 /*@C
3797     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3798     of the OFF-DIAGONAL portion of local A
3799 
3800     Collective on Mat
3801 
3802    Input Parameters:
3803 +    A,B - the matrices in mpiaij format
3804 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3805 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3806 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3807 
3808    Output Parameter:
3809 +    B_oth - the sequential matrix generated
3810 
3811     Level: developer
3812 
3813 @*/
3814 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3815 {
3816   VecScatter_MPI_General *gen_to,*gen_from;
3817   PetscErrorCode         ierr;
3818   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
3819   Mat_SeqAIJ             *b_oth;
3820   VecScatter             ctx=a->Mvctx;
3821   MPI_Comm               comm=ctx->comm;
3822   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3823   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
3824   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3825   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3826   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
3827   MPI_Status             *sstatus,rstatus;
3828   PetscInt               *cols,sbs,rbs;
3829   PetscScalar            *vals;
3830 
3831   PetscFunctionBegin;
3832   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
3833     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
3834   }
3835   if (!logkey_GetBrowsOfAocols) {
3836     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3837   }
3838   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3839   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3840 
3841   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3842   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3843   rvalues  = gen_from->values; /* holds the length of receiving row */
3844   svalues  = gen_to->values;   /* holds the length of sending row */
3845   nrecvs   = gen_from->n;
3846   nsends   = gen_to->n;
3847 
3848   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
3849   srow     = gen_to->indices;   /* local row index to be sent */
3850   sstarts  = gen_to->starts;
3851   sprocs   = gen_to->procs;
3852   sstatus  = gen_to->sstatus;
3853   sbs      = gen_to->bs;
3854   rstarts  = gen_from->starts;
3855   rprocs   = gen_from->procs;
3856   rbs      = gen_from->bs;
3857 
3858   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3859   if (scall == MAT_INITIAL_MATRIX){
3860     /* i-array */
3861     /*---------*/
3862     /*  post receives */
3863     for (i=0; i<nrecvs; i++){
3864       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
3865       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
3866       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3867     }
3868 
3869     /* pack the outgoing message */
3870     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3871     rstartsj = sstartsj + nsends +1;
3872     sstartsj[0] = 0;  rstartsj[0] = 0;
3873     len = 0; /* total length of j or a array to be sent */
3874     k = 0;
3875     for (i=0; i<nsends; i++){
3876       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
3877       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
3878       for (j=0; j<nrows; j++) {
3879         row = srow[k] + B->rmap.range[rank]; /* global row idx */
3880         for (l=0; l<sbs; l++){
3881           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3882           rowlen[j*sbs+l] = ncols;
3883           len += ncols;
3884           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3885         }
3886         k++;
3887       }
3888       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3889       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3890     }
3891     /* recvs and sends of i-array are completed */
3892     i = nrecvs;
3893     while (i--) {
3894       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3895     }
3896     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3897 
3898     /* allocate buffers for sending j and a arrays */
3899     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3900     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3901 
3902     /* create i-array of B_oth */
3903     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3904     b_othi[0] = 0;
3905     len = 0; /* total length of j or a array to be received */
3906     k = 0;
3907     for (i=0; i<nrecvs; i++){
3908       rowlen = (PetscInt*)rvalues + rstarts[i];
3909       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
3910       for (j=0; j<nrows; j++) {
3911         b_othi[k+1] = b_othi[k] + rowlen[j];
3912         len += rowlen[j]; k++;
3913       }
3914       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3915     }
3916 
3917     /* allocate space for j and a arrrays of B_oth */
3918     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3919     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3920 
3921     /* j-array */
3922     /*---------*/
3923     /*  post receives of j-array */
3924     for (i=0; i<nrecvs; i++){
3925       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3926       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3927     }
3928 
3929     /* pack the outgoing message j-array */
3930     k = 0;
3931     for (i=0; i<nsends; i++){
3932       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
3933       bufJ = bufj+sstartsj[i];
3934       for (j=0; j<nrows; j++) {
3935         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
3936         for (ll=0; ll<sbs; ll++){
3937           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3938           for (l=0; l<ncols; l++){
3939             *bufJ++ = cols[l];
3940           }
3941           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3942         }
3943       }
3944       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3945     }
3946 
3947     /* recvs and sends of j-array are completed */
3948     i = nrecvs;
3949     while (i--) {
3950       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3951     }
3952     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3953   } else if (scall == MAT_REUSE_MATRIX){
3954     sstartsj = *startsj;
3955     rstartsj = sstartsj + nsends +1;
3956     bufa     = *bufa_ptr;
3957     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3958     b_otha   = b_oth->a;
3959   } else {
3960     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3961   }
3962 
3963   /* a-array */
3964   /*---------*/
3965   /*  post receives of a-array */
3966   for (i=0; i<nrecvs; i++){
3967     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3968     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3969   }
3970 
3971   /* pack the outgoing message a-array */
3972   k = 0;
3973   for (i=0; i<nsends; i++){
3974     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
3975     bufA = bufa+sstartsj[i];
3976     for (j=0; j<nrows; j++) {
3977       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
3978       for (ll=0; ll<sbs; ll++){
3979         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3980         for (l=0; l<ncols; l++){
3981           *bufA++ = vals[l];
3982         }
3983         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3984       }
3985     }
3986     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3987   }
3988   /* recvs and sends of a-array are completed */
3989   i = nrecvs;
3990   while (i--) {
3991     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3992   }
3993   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3994   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
3995 
3996   if (scall == MAT_INITIAL_MATRIX){
3997     /* put together the new matrix */
3998     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3999 
4000     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4001     /* Since these are PETSc arrays, change flags to free them as necessary. */
4002     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4003     b_oth->free_a  = PETSC_TRUE;
4004     b_oth->free_ij = PETSC_TRUE;
4005     b_oth->nonew   = 0;
4006 
4007     ierr = PetscFree(bufj);CHKERRQ(ierr);
4008     if (!startsj || !bufa_ptr){
4009       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4010       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4011     } else {
4012       *startsj  = sstartsj;
4013       *bufa_ptr = bufa;
4014     }
4015   }
4016   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4017 
4018   PetscFunctionReturn(0);
4019 }
4020 
4021 #undef __FUNCT__
4022 #define __FUNCT__ "MatGetCommunicationStructs"
4023 /*@C
4024   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4025 
4026   Not Collective
4027 
4028   Input Parameters:
4029 . A - The matrix in mpiaij format
4030 
4031   Output Parameter:
4032 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4033 . colmap - A map from global column index to local index into lvec
4034 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4035 
4036   Level: developer
4037 
4038 @*/
4039 #if defined (PETSC_USE_CTABLE)
4040 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4041 #else
4042 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4043 #endif
4044 {
4045   Mat_MPIAIJ *a;
4046 
4047   PetscFunctionBegin;
4048   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4049   PetscValidPointer(lvec, 2)
4050   PetscValidPointer(colmap, 3)
4051   PetscValidPointer(multScatter, 4)
4052   a = (Mat_MPIAIJ *) A->data;
4053   if (lvec) *lvec = a->lvec;
4054   if (colmap) *colmap = a->colmap;
4055   if (multScatter) *multScatter = a->Mvctx;
4056   PetscFunctionReturn(0);
4057 }
4058 
4059 EXTERN_C_BEGIN
4060 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*);
4061 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*);
4062 EXTERN_C_END
4063 
4064 /*MC
4065    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4066 
4067    Options Database Keys:
4068 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4069 
4070   Level: beginner
4071 
4072 .seealso: MatCreateMPIAIJ
4073 M*/
4074 
4075 EXTERN_C_BEGIN
4076 #undef __FUNCT__
4077 #define __FUNCT__ "MatCreate_MPIAIJ"
4078 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4079 {
4080   Mat_MPIAIJ     *b;
4081   PetscErrorCode ierr;
4082   PetscMPIInt    size;
4083 
4084   PetscFunctionBegin;
4085   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
4086 
4087   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
4088   B->data         = (void*)b;
4089   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4090   B->factor       = 0;
4091   B->rmap.bs      = 1;
4092   B->assembled    = PETSC_FALSE;
4093   B->mapping      = 0;
4094 
4095   B->insertmode      = NOT_SET_VALUES;
4096   b->size            = size;
4097   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
4098 
4099   /* build cache for off array entries formed */
4100   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
4101   b->donotstash  = PETSC_FALSE;
4102   b->colmap      = 0;
4103   b->garray      = 0;
4104   b->roworiented = PETSC_TRUE;
4105 
4106   /* stuff used for matrix vector multiply */
4107   b->lvec      = PETSC_NULL;
4108   b->Mvctx     = PETSC_NULL;
4109 
4110   /* stuff for MatGetRow() */
4111   b->rowindices   = 0;
4112   b->rowvalues    = 0;
4113   b->getrowactive = PETSC_FALSE;
4114 
4115 
4116   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4117                                      "MatStoreValues_MPIAIJ",
4118                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4119   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4120                                      "MatRetrieveValues_MPIAIJ",
4121                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4122   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4123 				     "MatGetDiagonalBlock_MPIAIJ",
4124                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4125   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4126 				     "MatIsTranspose_MPIAIJ",
4127 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4128   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4129 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4130 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4131   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4132 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4133 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4134   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4135 				     "MatDiagonalScaleLocal_MPIAIJ",
4136 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4137   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4138                                      "MatConvert_MPIAIJ_MPICSRPERM",
4139                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
4140   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4141                                      "MatConvert_MPIAIJ_MPICRL",
4142                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
4143   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4144   PetscFunctionReturn(0);
4145 }
4146 EXTERN_C_END
4147 
4148 #undef __FUNCT__
4149 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
4150 /*@C
4151      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4152          and "off-diagonal" part of the matrix in CSR format.
4153 
4154    Collective on MPI_Comm
4155 
4156    Input Parameters:
4157 +  comm - MPI communicator
4158 .  m - number of local rows (Cannot be PETSC_DECIDE)
4159 .  n - This value should be the same as the local size used in creating the
4160        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4161        calculated if N is given) For square matrices n is almost always m.
4162 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4163 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4164 .   i - row indices for "diagonal" portion of matrix
4165 .   j - column indices
4166 .   a - matrix values
4167 .   oi - row indices for "off-diagonal" portion of matrix
4168 .   oj - column indices
4169 -   oa - matrix values
4170 
4171    Output Parameter:
4172 .   mat - the matrix
4173 
4174    Level: advanced
4175 
4176    Notes:
4177        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4178 
4179        The i and j indices are 0 based
4180 
4181        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4182 
4183 
4184 .keywords: matrix, aij, compressed row, sparse, parallel
4185 
4186 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4187           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4188 @*/
4189 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4190 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4191 {
4192   PetscErrorCode ierr;
4193   Mat_MPIAIJ     *maij;
4194 
4195  PetscFunctionBegin;
4196   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4197   if (i[0]) {
4198     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4199   }
4200   if (oi[0]) {
4201     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4202   }
4203   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4204   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4205   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4206   maij = (Mat_MPIAIJ*) (*mat)->data;
4207   maij->donotstash     = PETSC_TRUE;
4208   (*mat)->preallocated = PETSC_TRUE;
4209 
4210   (*mat)->rmap.bs = (*mat)->cmap.bs = 1;
4211   ierr = PetscMapInitialize((*mat)->comm,&(*mat)->rmap);CHKERRQ(ierr);
4212   ierr = PetscMapInitialize((*mat)->comm,&(*mat)->cmap);CHKERRQ(ierr);
4213 
4214   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
4215   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
4216 
4217   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4218   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4219   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4220   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4221 
4222   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4223   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4224   PetscFunctionReturn(0);
4225 }
4226 
4227 /*
4228     Special version for direct calls from Fortran
4229 */
4230 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4231 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4232 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4233 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4234 #endif
4235 
4236 /* Change these macros so can be used in void function */
4237 #undef CHKERRQ
4238 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
4239 #undef SETERRQ2
4240 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
4241 #undef SETERRQ
4242 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
4243 
4244 EXTERN_C_BEGIN
4245 #undef __FUNCT__
4246 #define __FUNCT__ "matsetvaluesmpiaij_"
4247 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4248 {
4249   Mat            mat = *mmat;
4250   PetscInt       m = *mm, n = *mn;
4251   InsertMode     addv = *maddv;
4252   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
4253   PetscScalar    value;
4254   PetscErrorCode ierr;
4255 
4256   MatPreallocated(mat);
4257   if (mat->insertmode == NOT_SET_VALUES) {
4258     mat->insertmode = addv;
4259   }
4260 #if defined(PETSC_USE_DEBUG)
4261   else if (mat->insertmode != addv) {
4262     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4263   }
4264 #endif
4265   {
4266   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4267   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4268   PetscTruth     roworiented = aij->roworiented;
4269 
4270   /* Some Variables required in the macro */
4271   Mat            A = aij->A;
4272   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
4273   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4274   PetscScalar    *aa = a->a;
4275   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4276   Mat            B = aij->B;
4277   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4278   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4279   PetscScalar    *ba = b->a;
4280 
4281   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4282   PetscInt       nonew = a->nonew;
4283   PetscScalar    *ap1,*ap2;
4284 
4285   PetscFunctionBegin;
4286   for (i=0; i<m; i++) {
4287     if (im[i] < 0) continue;
4288 #if defined(PETSC_USE_DEBUG)
4289     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4290 #endif
4291     if (im[i] >= rstart && im[i] < rend) {
4292       row      = im[i] - rstart;
4293       lastcol1 = -1;
4294       rp1      = aj + ai[row];
4295       ap1      = aa + ai[row];
4296       rmax1    = aimax[row];
4297       nrow1    = ailen[row];
4298       low1     = 0;
4299       high1    = nrow1;
4300       lastcol2 = -1;
4301       rp2      = bj + bi[row];
4302       ap2      = ba + bi[row];
4303       rmax2    = bimax[row];
4304       nrow2    = bilen[row];
4305       low2     = 0;
4306       high2    = nrow2;
4307 
4308       for (j=0; j<n; j++) {
4309         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4310         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4311         if (in[j] >= cstart && in[j] < cend){
4312           col = in[j] - cstart;
4313           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4314         } else if (in[j] < 0) continue;
4315 #if defined(PETSC_USE_DEBUG)
4316         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4317 #endif
4318         else {
4319           if (mat->was_assembled) {
4320             if (!aij->colmap) {
4321               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4322             }
4323 #if defined (PETSC_USE_CTABLE)
4324             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4325 	    col--;
4326 #else
4327             col = aij->colmap[in[j]] - 1;
4328 #endif
4329             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4330               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4331               col =  in[j];
4332               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4333               B = aij->B;
4334               b = (Mat_SeqAIJ*)B->data;
4335               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4336               rp2      = bj + bi[row];
4337               ap2      = ba + bi[row];
4338               rmax2    = bimax[row];
4339               nrow2    = bilen[row];
4340               low2     = 0;
4341               high2    = nrow2;
4342               bm       = aij->B->rmap.n;
4343               ba = b->a;
4344             }
4345           } else col = in[j];
4346           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4347         }
4348       }
4349     } else {
4350       if (!aij->donotstash) {
4351         if (roworiented) {
4352           if (ignorezeroentries && v[i*n] == 0.0) continue;
4353           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4354         } else {
4355           if (ignorezeroentries && v[i] == 0.0) continue;
4356           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4357         }
4358       }
4359     }
4360   }}
4361   PetscFunctionReturnVoid();
4362 }
4363 EXTERN_C_END
4364 
4365