xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision fd705b320d8d44969be9ca25a36dbdd35fbe8e12)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = a->ignorezeroentries;
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatGetValues_MPIAIJ"
240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
241 {
242   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
243   PetscErrorCode ierr;
244   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
245   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
246 
247   PetscFunctionBegin;
248   for (i=0; i<m; i++) {
249     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
250     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
251     if (idxm[i] >= rstart && idxm[i] < rend) {
252       row = idxm[i] - rstart;
253       for (j=0; j<n; j++) {
254         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
255         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
256         if (idxn[j] >= cstart && idxn[j] < cend){
257           col = idxn[j] - cstart;
258           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
259         } else {
260           if (!aij->colmap) {
261             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
262           }
263 #if defined (PETSC_USE_CTABLE)
264           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
265           col --;
266 #else
267           col = aij->colmap[idxn[j]] - 1;
268 #endif
269           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
270           else {
271             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
272           }
273         }
274       }
275     } else {
276       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
277     }
278   }
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
287   PetscErrorCode ierr;
288   PetscInt       nstash,reallocs;
289   InsertMode     addv;
290 
291   PetscFunctionBegin;
292   if (aij->donotstash) {
293     PetscFunctionReturn(0);
294   }
295 
296   /* make sure all processors are either in INSERTMODE or ADDMODE */
297   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
298   if (addv == (ADD_VALUES|INSERT_VALUES)) {
299     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
300   }
301   mat->insertmode = addv; /* in case this processor had no cache */
302 
303   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr);
304   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
305   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
312 {
313   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
314   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
315   PetscErrorCode ierr;
316   PetscMPIInt    n;
317   PetscInt       i,j,rstart,ncols,flg;
318   PetscInt       *row,*col,other_disassembled;
319   PetscScalar    *val;
320   InsertMode     addv = mat->insertmode;
321 
322   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
323   PetscFunctionBegin;
324   if (!aij->donotstash) {
325     while (1) {
326       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
327       if (!flg) break;
328 
329       for (i=0; i<n;) {
330         /* Now identify the consecutive vals belonging to the same row */
331         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
332         if (j < n) ncols = j-i;
333         else       ncols = n-i;
334         /* Now assemble all these values with a single function call */
335         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
336         i = j;
337       }
338     }
339     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
340   }
341   a->compressedrow.use     = PETSC_FALSE;
342   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
344 
345   /* determine if any processor has disassembled, if so we must
346      also disassemble ourselfs, in order that we may reassemble. */
347   /*
348      if nonzero structure of submatrix B cannot change then we know that
349      no processor disassembled thus we can skip this stuff
350   */
351   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
352     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
353     if (mat->was_assembled && !other_disassembled) {
354       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
355     }
356   }
357   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
358     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
359   }
360   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
361   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
362   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
363   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
364 
365   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
366   aij->rowvalues = 0;
367 
368   /* used by MatAXPY() */
369   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
370   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
371 
372   PetscFunctionReturn(0);
373 }
374 
375 #undef __FUNCT__
376 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
377 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
378 {
379   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
380   PetscErrorCode ierr;
381 
382   PetscFunctionBegin;
383   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
384   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
385   PetscFunctionReturn(0);
386 }
387 
388 #undef __FUNCT__
389 #define __FUNCT__ "MatZeroRows_MPIAIJ"
390 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
391 {
392   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
393   PetscErrorCode ierr;
394   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
395   PetscInt       i,*owners = A->rmap.range;
396   PetscInt       *nprocs,j,idx,nsends,row;
397   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
398   PetscInt       *rvalues,count,base,slen,*source;
399   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
400   MPI_Comm       comm = ((PetscObject)A)->comm;
401   MPI_Request    *send_waits,*recv_waits;
402   MPI_Status     recv_status,*send_status;
403 #if defined(PETSC_DEBUG)
404   PetscTruth     found = PETSC_FALSE;
405 #endif
406 
407   PetscFunctionBegin;
408   /*  first count number of contributors to each processor */
409   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
410   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
411   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
412   j = 0;
413   for (i=0; i<N; i++) {
414     if (lastidx > (idx = rows[i])) j = 0;
415     lastidx = idx;
416     for (; j<size; j++) {
417       if (idx >= owners[j] && idx < owners[j+1]) {
418         nprocs[2*j]++;
419         nprocs[2*j+1] = 1;
420         owner[i] = j;
421 #if defined(PETSC_DEBUG)
422         found = PETSC_TRUE;
423 #endif
424         break;
425       }
426     }
427 #if defined(PETSC_DEBUG)
428     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
429     found = PETSC_FALSE;
430 #endif
431   }
432   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
433 
434   /* inform other processors of number of messages and max length*/
435   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
436 
437   /* post receives:   */
438   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
439   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
440   for (i=0; i<nrecvs; i++) {
441     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
442   }
443 
444   /* do sends:
445       1) starts[i] gives the starting index in svalues for stuff going to
446          the ith processor
447   */
448   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
449   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
450   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
451   starts[0] = 0;
452   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
453   for (i=0; i<N; i++) {
454     svalues[starts[owner[i]]++] = rows[i];
455   }
456 
457   starts[0] = 0;
458   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
459   count = 0;
460   for (i=0; i<size; i++) {
461     if (nprocs[2*i+1]) {
462       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
463     }
464   }
465   ierr = PetscFree(starts);CHKERRQ(ierr);
466 
467   base = owners[rank];
468 
469   /*  wait on receives */
470   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
471   source = lens + nrecvs;
472   count  = nrecvs; slen = 0;
473   while (count) {
474     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
475     /* unpack receives into our local space */
476     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
477     source[imdex]  = recv_status.MPI_SOURCE;
478     lens[imdex]    = n;
479     slen          += n;
480     count--;
481   }
482   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
483 
484   /* move the data into the send scatter */
485   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
486   count = 0;
487   for (i=0; i<nrecvs; i++) {
488     values = rvalues + i*nmax;
489     for (j=0; j<lens[i]; j++) {
490       lrows[count++] = values[j] - base;
491     }
492   }
493   ierr = PetscFree(rvalues);CHKERRQ(ierr);
494   ierr = PetscFree(lens);CHKERRQ(ierr);
495   ierr = PetscFree(owner);CHKERRQ(ierr);
496   ierr = PetscFree(nprocs);CHKERRQ(ierr);
497 
498   /* actually zap the local rows */
499   /*
500         Zero the required rows. If the "diagonal block" of the matrix
501      is square and the user wishes to set the diagonal we use separate
502      code so that MatSetValues() is not called for each diagonal allocating
503      new memory, thus calling lots of mallocs and slowing things down.
504 
505        Contributed by: Matthew Knepley
506   */
507   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
508   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
509   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
510     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
511   } else if (diag != 0.0) {
512     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
513     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
514       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
515 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
516     }
517     for (i = 0; i < slen; i++) {
518       row  = lrows[i] + rstart;
519       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
520     }
521     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
522     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
523   } else {
524     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
525   }
526   ierr = PetscFree(lrows);CHKERRQ(ierr);
527 
528   /* wait on sends */
529   if (nsends) {
530     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
531     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
532     ierr = PetscFree(send_status);CHKERRQ(ierr);
533   }
534   ierr = PetscFree(send_waits);CHKERRQ(ierr);
535   ierr = PetscFree(svalues);CHKERRQ(ierr);
536 
537   PetscFunctionReturn(0);
538 }
539 
540 #undef __FUNCT__
541 #define __FUNCT__ "MatMult_MPIAIJ"
542 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
543 {
544   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
545   PetscErrorCode ierr;
546   PetscInt       nt;
547 
548   PetscFunctionBegin;
549   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
550   if (nt != A->cmap.n) {
551     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
552   }
553   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
554   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
555   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
556   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatMultAdd_MPIAIJ"
562 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
563 {
564   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
565   PetscErrorCode ierr;
566 
567   PetscFunctionBegin;
568   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
569   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
570   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
571   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 #undef __FUNCT__
576 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
577 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
578 {
579   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
580   PetscErrorCode ierr;
581   PetscTruth     merged;
582 
583   PetscFunctionBegin;
584   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
585   /* do nondiagonal part */
586   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
587   if (!merged) {
588     /* send it on its way */
589     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
590     /* do local part */
591     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
592     /* receive remote parts: note this assumes the values are not actually */
593     /* added in yy until the next line, */
594     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
595   } else {
596     /* do local part */
597     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
598     /* send it on its way */
599     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
600     /* values actually were received in the Begin() but we need to call this nop */
601     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
602   }
603   PetscFunctionReturn(0);
604 }
605 
606 EXTERN_C_BEGIN
607 #undef __FUNCT__
608 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
609 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
610 {
611   MPI_Comm       comm;
612   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
613   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
614   IS             Me,Notme;
615   PetscErrorCode ierr;
616   PetscInt       M,N,first,last,*notme,i;
617   PetscMPIInt    size;
618 
619   PetscFunctionBegin;
620 
621   /* Easy test: symmetric diagonal block */
622   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
623   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
624   if (!*f) PetscFunctionReturn(0);
625   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
626   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
627   if (size == 1) PetscFunctionReturn(0);
628 
629   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
630   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
631   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
632   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
633   for (i=0; i<first; i++) notme[i] = i;
634   for (i=last; i<M; i++) notme[i-last+first] = i;
635   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
636   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
637   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
638   Aoff = Aoffs[0];
639   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
640   Boff = Boffs[0];
641   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
642   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
643   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
644   ierr = ISDestroy(Me);CHKERRQ(ierr);
645   ierr = ISDestroy(Notme);CHKERRQ(ierr);
646 
647   PetscFunctionReturn(0);
648 }
649 EXTERN_C_END
650 
651 #undef __FUNCT__
652 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
653 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
654 {
655   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
656   PetscErrorCode ierr;
657 
658   PetscFunctionBegin;
659   /* do nondiagonal part */
660   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
661   /* send it on its way */
662   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
663   /* do local part */
664   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
665   /* receive remote parts */
666   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
667   PetscFunctionReturn(0);
668 }
669 
670 /*
671   This only works correctly for square matrices where the subblock A->A is the
672    diagonal block
673 */
674 #undef __FUNCT__
675 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
676 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
677 {
678   PetscErrorCode ierr;
679   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
680 
681   PetscFunctionBegin;
682   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
683   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
684     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
685   }
686   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 
690 #undef __FUNCT__
691 #define __FUNCT__ "MatScale_MPIAIJ"
692 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
693 {
694   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
695   PetscErrorCode ierr;
696 
697   PetscFunctionBegin;
698   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
699   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
700   PetscFunctionReturn(0);
701 }
702 
703 #undef __FUNCT__
704 #define __FUNCT__ "MatDestroy_MPIAIJ"
705 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
706 {
707   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
708   PetscErrorCode ierr;
709 
710   PetscFunctionBegin;
711 #if defined(PETSC_USE_LOG)
712   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
713 #endif
714   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
715   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
716   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
717 #if defined (PETSC_USE_CTABLE)
718   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
719 #else
720   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
721 #endif
722   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
723   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
724   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
725   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
726   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
727   ierr = PetscFree(aij);CHKERRQ(ierr);
728 
729   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
730   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
736   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatView_MPIAIJ_Binary"
742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
743 {
744   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
745   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
746   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
747   PetscErrorCode    ierr;
748   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
749   int               fd;
750   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
751   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
752   PetscScalar       *column_values;
753 
754   PetscFunctionBegin;
755   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
756   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
757   nz   = A->nz + B->nz;
758   if (!rank) {
759     header[0] = MAT_FILE_COOKIE;
760     header[1] = mat->rmap.N;
761     header[2] = mat->cmap.N;
762     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
763     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
764     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
765     /* get largest number of rows any processor has */
766     rlen = mat->rmap.n;
767     range = mat->rmap.range;
768     for (i=1; i<size; i++) {
769       rlen = PetscMax(rlen,range[i+1] - range[i]);
770     }
771   } else {
772     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
773     rlen = mat->rmap.n;
774   }
775 
776   /* load up the local row counts */
777   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
778   for (i=0; i<mat->rmap.n; i++) {
779     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
780   }
781 
782   /* store the row lengths to the file */
783   if (!rank) {
784     MPI_Status status;
785     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
786     for (i=1; i<size; i++) {
787       rlen = range[i+1] - range[i];
788       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
789       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
790     }
791   } else {
792     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
793   }
794   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
795 
796   /* load up the local column indices */
797   nzmax = nz; /* )th processor needs space a largest processor needs */
798   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
799   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
800   cnt  = 0;
801   for (i=0; i<mat->rmap.n; i++) {
802     for (j=B->i[i]; j<B->i[i+1]; j++) {
803       if ( (col = garray[B->j[j]]) > cstart) break;
804       column_indices[cnt++] = col;
805     }
806     for (k=A->i[i]; k<A->i[i+1]; k++) {
807       column_indices[cnt++] = A->j[k] + cstart;
808     }
809     for (; j<B->i[i+1]; j++) {
810       column_indices[cnt++] = garray[B->j[j]];
811     }
812   }
813   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
814 
815   /* store the column indices to the file */
816   if (!rank) {
817     MPI_Status status;
818     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
819     for (i=1; i<size; i++) {
820       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
821       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
822       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
823       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
824     }
825   } else {
826     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
827     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
828   }
829   ierr = PetscFree(column_indices);CHKERRQ(ierr);
830 
831   /* load up the local column values */
832   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
833   cnt  = 0;
834   for (i=0; i<mat->rmap.n; i++) {
835     for (j=B->i[i]; j<B->i[i+1]; j++) {
836       if ( garray[B->j[j]] > cstart) break;
837       column_values[cnt++] = B->a[j];
838     }
839     for (k=A->i[i]; k<A->i[i+1]; k++) {
840       column_values[cnt++] = A->a[k];
841     }
842     for (; j<B->i[i+1]; j++) {
843       column_values[cnt++] = B->a[j];
844     }
845   }
846   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
847 
848   /* store the column values to the file */
849   if (!rank) {
850     MPI_Status status;
851     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
852     for (i=1; i<size; i++) {
853       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
854       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
855       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
856       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
857     }
858   } else {
859     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
860     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
861   }
862   ierr = PetscFree(column_values);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
869 {
870   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
871   PetscErrorCode    ierr;
872   PetscMPIInt       rank = aij->rank,size = aij->size;
873   PetscTruth        isdraw,iascii,isbinary;
874   PetscViewer       sviewer;
875   PetscViewerFormat format;
876 
877   PetscFunctionBegin;
878   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
880   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
881   if (iascii) {
882     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
883     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
884       MatInfo    info;
885       PetscTruth inodes;
886 
887       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
888       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
889       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
890       if (!inodes) {
891         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
892 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
893       } else {
894         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
895 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
896       }
897       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
898       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
899       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
900       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
901       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
902       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
903       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
904       PetscFunctionReturn(0);
905     } else if (format == PETSC_VIEWER_ASCII_INFO) {
906       PetscInt   inodecount,inodelimit,*inodes;
907       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
908       if (inodes) {
909         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
910       } else {
911         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
912       }
913       PetscFunctionReturn(0);
914     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
915       PetscFunctionReturn(0);
916     }
917   } else if (isbinary) {
918     if (size == 1) {
919       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
920       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
921     } else {
922       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
923     }
924     PetscFunctionReturn(0);
925   } else if (isdraw) {
926     PetscDraw  draw;
927     PetscTruth isnull;
928     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
929     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
930   }
931 
932   if (size == 1) {
933     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
934     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
935   } else {
936     /* assemble the entire matrix onto first processor. */
937     Mat         A;
938     Mat_SeqAIJ  *Aloc;
939     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
940     PetscScalar *a;
941 
942     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
943     if (!rank) {
944       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
945     } else {
946       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
947     }
948     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
949     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
950     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
951     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
952 
953     /* copy over the A part */
954     Aloc = (Mat_SeqAIJ*)aij->A->data;
955     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
956     row = mat->rmap.rstart;
957     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
958     for (i=0; i<m; i++) {
959       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
960       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
961     }
962     aj = Aloc->j;
963     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
964 
965     /* copy over the B part */
966     Aloc = (Mat_SeqAIJ*)aij->B->data;
967     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
968     row  = mat->rmap.rstart;
969     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
970     ct   = cols;
971     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
972     for (i=0; i<m; i++) {
973       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
974       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
975     }
976     ierr = PetscFree(ct);CHKERRQ(ierr);
977     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
979     /*
980        Everyone has to call to draw the matrix since the graphics waits are
981        synchronized across all processors that share the PetscDraw object
982     */
983     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
984     if (!rank) {
985       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
986       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
987     }
988     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
989     ierr = MatDestroy(A);CHKERRQ(ierr);
990   }
991   PetscFunctionReturn(0);
992 }
993 
994 #undef __FUNCT__
995 #define __FUNCT__ "MatView_MPIAIJ"
996 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
997 {
998   PetscErrorCode ierr;
999   PetscTruth     iascii,isdraw,issocket,isbinary;
1000 
1001   PetscFunctionBegin;
1002   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1003   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1004   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1005   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1006   if (iascii || isdraw || isbinary || issocket) {
1007     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1008   } else {
1009     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1010   }
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatRelax_MPIAIJ"
1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1017 {
1018   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1019   PetscErrorCode ierr;
1020   Vec            bb1;
1021 
1022   PetscFunctionBegin;
1023   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1024 
1025   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1026     if (flag & SOR_ZERO_INITIAL_GUESS) {
1027       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1028       its--;
1029     }
1030 
1031     while (its--) {
1032       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034 
1035       /* update rhs: bb1 = bb - B*x */
1036       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1037       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1038 
1039       /* local sweep */
1040       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
1041     }
1042   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1043     if (flag & SOR_ZERO_INITIAL_GUESS) {
1044       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1045       its--;
1046     }
1047     while (its--) {
1048       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1049       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1050 
1051       /* update rhs: bb1 = bb - B*x */
1052       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1053       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1054 
1055       /* local sweep */
1056       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1057     }
1058   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1059     if (flag & SOR_ZERO_INITIAL_GUESS) {
1060       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1061       its--;
1062     }
1063     while (its--) {
1064       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1065       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1066 
1067       /* update rhs: bb1 = bb - B*x */
1068       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1069       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1070 
1071       /* local sweep */
1072       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1073     }
1074   } else {
1075     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1076   }
1077 
1078   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 #undef __FUNCT__
1083 #define __FUNCT__ "MatPermute_MPIAIJ"
1084 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1085 {
1086   MPI_Comm       comm,pcomm;
1087   PetscInt       first,local_size,nrows,*rows;
1088   int            ntids;
1089   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1090   PetscErrorCode ierr;
1091 
1092   PetscFunctionBegin;
1093   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1094   /* make a collective version of 'rowp' */
1095   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1096   if (pcomm==comm) {
1097     crowp = rowp;
1098   } else {
1099     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1100     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1101     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1102     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1103   }
1104   /* collect the global row permutation and invert it */
1105   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1106   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1107   if (pcomm!=comm) {
1108     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1109   }
1110   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1111   /* get the local target indices */
1112   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1113   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1114   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1115   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1116   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1117   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1118   /* the column permutation is so much easier;
1119      make a local version of 'colp' and invert it */
1120   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1121   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1122   if (ntids==1) {
1123     lcolp = colp;
1124   } else {
1125     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1126     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1127     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1128   }
1129   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1130   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1131   if (ntids>1) {
1132     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1133     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1134   }
1135   /* now we just get the submatrix */
1136   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1137   /* clean up */
1138   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1139   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 #undef __FUNCT__
1144 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1145 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1146 {
1147   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1148   Mat            A = mat->A,B = mat->B;
1149   PetscErrorCode ierr;
1150   PetscReal      isend[5],irecv[5];
1151 
1152   PetscFunctionBegin;
1153   info->block_size     = 1.0;
1154   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1155   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1156   isend[3] = info->memory;  isend[4] = info->mallocs;
1157   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1158   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1159   isend[3] += info->memory;  isend[4] += info->mallocs;
1160   if (flag == MAT_LOCAL) {
1161     info->nz_used      = isend[0];
1162     info->nz_allocated = isend[1];
1163     info->nz_unneeded  = isend[2];
1164     info->memory       = isend[3];
1165     info->mallocs      = isend[4];
1166   } else if (flag == MAT_GLOBAL_MAX) {
1167     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1168     info->nz_used      = irecv[0];
1169     info->nz_allocated = irecv[1];
1170     info->nz_unneeded  = irecv[2];
1171     info->memory       = irecv[3];
1172     info->mallocs      = irecv[4];
1173   } else if (flag == MAT_GLOBAL_SUM) {
1174     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1175     info->nz_used      = irecv[0];
1176     info->nz_allocated = irecv[1];
1177     info->nz_unneeded  = irecv[2];
1178     info->memory       = irecv[3];
1179     info->mallocs      = irecv[4];
1180   }
1181   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1182   info->fill_ratio_needed = 0;
1183   info->factor_mallocs    = 0;
1184   info->rows_global       = (double)matin->rmap.N;
1185   info->columns_global    = (double)matin->cmap.N;
1186   info->rows_local        = (double)matin->rmap.n;
1187   info->columns_local     = (double)matin->cmap.N;
1188 
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 #undef __FUNCT__
1193 #define __FUNCT__ "MatSetOption_MPIAIJ"
1194 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1195 {
1196   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1197   PetscErrorCode ierr;
1198 
1199   PetscFunctionBegin;
1200   switch (op) {
1201   case MAT_NEW_NONZERO_LOCATIONS:
1202   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1203   case MAT_KEEP_ZEROED_ROWS:
1204   case MAT_NEW_NONZERO_LOCATION_ERR:
1205   case MAT_USE_INODES:
1206   case MAT_IGNORE_ZERO_ENTRIES:
1207     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1208     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1209     break;
1210   case MAT_ROW_ORIENTED:
1211     a->roworiented = flg;
1212     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1213     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1214     break;
1215   case MAT_NEW_DIAGONALS:
1216     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1217     break;
1218   case MAT_IGNORE_OFF_PROC_ENTRIES:
1219     a->donotstash = PETSC_TRUE;
1220     break;
1221   case MAT_SYMMETRIC:
1222     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1223     break;
1224   case MAT_STRUCTURALLY_SYMMETRIC:
1225   case MAT_HERMITIAN:
1226   case MAT_SYMMETRY_ETERNAL:
1227     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1228     break;
1229   default:
1230     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1231   }
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "MatGetRow_MPIAIJ"
1237 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1238 {
1239   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1240   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1241   PetscErrorCode ierr;
1242   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1243   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1244   PetscInt       *cmap,*idx_p;
1245 
1246   PetscFunctionBegin;
1247   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1248   mat->getrowactive = PETSC_TRUE;
1249 
1250   if (!mat->rowvalues && (idx || v)) {
1251     /*
1252         allocate enough space to hold information from the longest row.
1253     */
1254     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1255     PetscInt     max = 1,tmp;
1256     for (i=0; i<matin->rmap.n; i++) {
1257       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1258       if (max < tmp) { max = tmp; }
1259     }
1260     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1261     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1262   }
1263 
1264   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1265   lrow = row - rstart;
1266 
1267   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1268   if (!v)   {pvA = 0; pvB = 0;}
1269   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1270   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1271   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1272   nztot = nzA + nzB;
1273 
1274   cmap  = mat->garray;
1275   if (v  || idx) {
1276     if (nztot) {
1277       /* Sort by increasing column numbers, assuming A and B already sorted */
1278       PetscInt imark = -1;
1279       if (v) {
1280         *v = v_p = mat->rowvalues;
1281         for (i=0; i<nzB; i++) {
1282           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1283           else break;
1284         }
1285         imark = i;
1286         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1287         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1288       }
1289       if (idx) {
1290         *idx = idx_p = mat->rowindices;
1291         if (imark > -1) {
1292           for (i=0; i<imark; i++) {
1293             idx_p[i] = cmap[cworkB[i]];
1294           }
1295         } else {
1296           for (i=0; i<nzB; i++) {
1297             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1298             else break;
1299           }
1300           imark = i;
1301         }
1302         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1303         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1304       }
1305     } else {
1306       if (idx) *idx = 0;
1307       if (v)   *v   = 0;
1308     }
1309   }
1310   *nz = nztot;
1311   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1312   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 #undef __FUNCT__
1317 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1318 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1319 {
1320   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1321 
1322   PetscFunctionBegin;
1323   if (!aij->getrowactive) {
1324     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1325   }
1326   aij->getrowactive = PETSC_FALSE;
1327   PetscFunctionReturn(0);
1328 }
1329 
1330 #undef __FUNCT__
1331 #define __FUNCT__ "MatNorm_MPIAIJ"
1332 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1333 {
1334   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1335   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1336   PetscErrorCode ierr;
1337   PetscInt       i,j,cstart = mat->cmap.rstart;
1338   PetscReal      sum = 0.0;
1339   PetscScalar    *v;
1340 
1341   PetscFunctionBegin;
1342   if (aij->size == 1) {
1343     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1344   } else {
1345     if (type == NORM_FROBENIUS) {
1346       v = amat->a;
1347       for (i=0; i<amat->nz; i++) {
1348 #if defined(PETSC_USE_COMPLEX)
1349         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1350 #else
1351         sum += (*v)*(*v); v++;
1352 #endif
1353       }
1354       v = bmat->a;
1355       for (i=0; i<bmat->nz; i++) {
1356 #if defined(PETSC_USE_COMPLEX)
1357         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1358 #else
1359         sum += (*v)*(*v); v++;
1360 #endif
1361       }
1362       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1363       *norm = sqrt(*norm);
1364     } else if (type == NORM_1) { /* max column norm */
1365       PetscReal *tmp,*tmp2;
1366       PetscInt    *jj,*garray = aij->garray;
1367       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1368       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1369       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1370       *norm = 0.0;
1371       v = amat->a; jj = amat->j;
1372       for (j=0; j<amat->nz; j++) {
1373         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1374       }
1375       v = bmat->a; jj = bmat->j;
1376       for (j=0; j<bmat->nz; j++) {
1377         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1378       }
1379       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1380       for (j=0; j<mat->cmap.N; j++) {
1381         if (tmp2[j] > *norm) *norm = tmp2[j];
1382       }
1383       ierr = PetscFree(tmp);CHKERRQ(ierr);
1384       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1385     } else if (type == NORM_INFINITY) { /* max row norm */
1386       PetscReal ntemp = 0.0;
1387       for (j=0; j<aij->A->rmap.n; j++) {
1388         v = amat->a + amat->i[j];
1389         sum = 0.0;
1390         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1391           sum += PetscAbsScalar(*v); v++;
1392         }
1393         v = bmat->a + bmat->i[j];
1394         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1395           sum += PetscAbsScalar(*v); v++;
1396         }
1397         if (sum > ntemp) ntemp = sum;
1398       }
1399       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1400     } else {
1401       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1402     }
1403   }
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 #undef __FUNCT__
1408 #define __FUNCT__ "MatTranspose_MPIAIJ"
1409 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1410 {
1411   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1412   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1413   PetscErrorCode ierr;
1414   PetscInt       M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz;
1415   PetscInt       cstart=A->cmap.rstart,ncol;
1416   Mat            B;
1417   PetscScalar    *array;
1418 
1419   PetscFunctionBegin;
1420   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1421 
1422   /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1423   ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n;
1424   ai = Aloc->i; aj = Aloc->j;
1425   bi = Bloc->i; bj = Bloc->j;
1426   ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1427   cols = d_nnz + na + 1; /* work space to be used by B part */
1428   ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1429   for (i=0; i<ai[ma]; i++){
1430     d_nnz[aj[i]] ++;
1431     aj[i] += cstart; /* global col index to be used by MatSetValues() */
1432   }
1433 
1434   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1435   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1436   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1437   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1438 
1439   /* copy over the A part */
1440   array = Aloc->a;
1441   row = A->rmap.rstart;
1442   for (i=0; i<ma; i++) {
1443     ncol = ai[i+1]-ai[i];
1444     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1445     row++; array += ncol; aj += ncol;
1446   }
1447   aj = Aloc->j;
1448   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1449 
1450   /* copy over the B part */
1451   array = Bloc->a;
1452   row = A->rmap.rstart;
1453   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1454   for (i=0; i<mb; i++) {
1455     ncol = bi[i+1]-bi[i];
1456     ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1457     row++; array += ncol; cols += ncol;
1458   }
1459   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1460   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1461   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1462   if (matout) {
1463     *matout = B;
1464   } else {
1465     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1466   }
1467   PetscFunctionReturn(0);
1468 }
1469 
1470 #undef __FUNCT__
1471 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1472 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1473 {
1474   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1475   Mat            a = aij->A,b = aij->B;
1476   PetscErrorCode ierr;
1477   PetscInt       s1,s2,s3;
1478 
1479   PetscFunctionBegin;
1480   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1481   if (rr) {
1482     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1483     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1484     /* Overlap communication with computation. */
1485     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1486   }
1487   if (ll) {
1488     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1489     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1490     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1491   }
1492   /* scale  the diagonal block */
1493   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1494 
1495   if (rr) {
1496     /* Do a scatter end and then right scale the off-diagonal block */
1497     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1498     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1499   }
1500 
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 #undef __FUNCT__
1505 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1506 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1507 {
1508   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1509   PetscErrorCode ierr;
1510 
1511   PetscFunctionBegin;
1512   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1513   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1514   PetscFunctionReturn(0);
1515 }
1516 #undef __FUNCT__
1517 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1518 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1519 {
1520   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1521   PetscErrorCode ierr;
1522 
1523   PetscFunctionBegin;
1524   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1525   PetscFunctionReturn(0);
1526 }
1527 
1528 #undef __FUNCT__
1529 #define __FUNCT__ "MatEqual_MPIAIJ"
1530 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1531 {
1532   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1533   Mat            a,b,c,d;
1534   PetscTruth     flg;
1535   PetscErrorCode ierr;
1536 
1537   PetscFunctionBegin;
1538   a = matA->A; b = matA->B;
1539   c = matB->A; d = matB->B;
1540 
1541   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1542   if (flg) {
1543     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1544   }
1545   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1546   PetscFunctionReturn(0);
1547 }
1548 
1549 #undef __FUNCT__
1550 #define __FUNCT__ "MatCopy_MPIAIJ"
1551 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1552 {
1553   PetscErrorCode ierr;
1554   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1555   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1556 
1557   PetscFunctionBegin;
1558   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1559   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1560     /* because of the column compression in the off-processor part of the matrix a->B,
1561        the number of columns in a->B and b->B may be different, hence we cannot call
1562        the MatCopy() directly on the two parts. If need be, we can provide a more
1563        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1564        then copying the submatrices */
1565     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1566   } else {
1567     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1568     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1569   }
1570   PetscFunctionReturn(0);
1571 }
1572 
1573 #undef __FUNCT__
1574 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1575 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1576 {
1577   PetscErrorCode ierr;
1578 
1579   PetscFunctionBegin;
1580   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #include "petscblaslapack.h"
1585 #undef __FUNCT__
1586 #define __FUNCT__ "MatAXPY_MPIAIJ"
1587 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1588 {
1589   PetscErrorCode ierr;
1590   PetscInt       i;
1591   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1592   PetscBLASInt   bnz,one=1;
1593   Mat_SeqAIJ     *x,*y;
1594 
1595   PetscFunctionBegin;
1596   if (str == SAME_NONZERO_PATTERN) {
1597     PetscScalar alpha = a;
1598     x = (Mat_SeqAIJ *)xx->A->data;
1599     y = (Mat_SeqAIJ *)yy->A->data;
1600     bnz = (PetscBLASInt)x->nz;
1601     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1602     x = (Mat_SeqAIJ *)xx->B->data;
1603     y = (Mat_SeqAIJ *)yy->B->data;
1604     bnz = (PetscBLASInt)x->nz;
1605     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1606   } else if (str == SUBSET_NONZERO_PATTERN) {
1607     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1608 
1609     x = (Mat_SeqAIJ *)xx->B->data;
1610     y = (Mat_SeqAIJ *)yy->B->data;
1611     if (y->xtoy && y->XtoY != xx->B) {
1612       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1613       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1614     }
1615     if (!y->xtoy) { /* get xtoy */
1616       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1617       y->XtoY = xx->B;
1618       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1619     }
1620     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1621   } else {
1622     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1623   }
1624   PetscFunctionReturn(0);
1625 }
1626 
1627 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1628 
1629 #undef __FUNCT__
1630 #define __FUNCT__ "MatConjugate_MPIAIJ"
1631 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1632 {
1633 #if defined(PETSC_USE_COMPLEX)
1634   PetscErrorCode ierr;
1635   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1636 
1637   PetscFunctionBegin;
1638   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1639   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1640 #else
1641   PetscFunctionBegin;
1642 #endif
1643   PetscFunctionReturn(0);
1644 }
1645 
1646 #undef __FUNCT__
1647 #define __FUNCT__ "MatRealPart_MPIAIJ"
1648 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1649 {
1650   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1651   PetscErrorCode ierr;
1652 
1653   PetscFunctionBegin;
1654   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1655   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1656   PetscFunctionReturn(0);
1657 }
1658 
1659 #undef __FUNCT__
1660 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1661 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1662 {
1663   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1664   PetscErrorCode ierr;
1665 
1666   PetscFunctionBegin;
1667   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1668   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1669   PetscFunctionReturn(0);
1670 }
1671 
1672 #ifdef PETSC_HAVE_PBGL
1673 
1674 #include <boost/parallel/mpi/bsp_process_group.hpp>
1675 #include <boost/graph/distributed/ilu_default_graph.hpp>
1676 #include <boost/graph/distributed/ilu_0_block.hpp>
1677 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1678 #include <boost/graph/distributed/petsc/interface.hpp>
1679 #include <boost/multi_array.hpp>
1680 #include <boost/parallel/distributed_property_map.hpp>
1681 
1682 #undef __FUNCT__
1683 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1684 /*
1685   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1686 */
1687 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1688 {
1689   namespace petsc = boost::distributed::petsc;
1690 
1691   namespace graph_dist = boost::graph::distributed;
1692   using boost::graph::distributed::ilu_default::process_group_type;
1693   using boost::graph::ilu_permuted;
1694 
1695   PetscTruth      row_identity, col_identity;
1696   PetscContainer  c;
1697   PetscInt        m, n, M, N;
1698   PetscErrorCode  ierr;
1699 
1700   PetscFunctionBegin;
1701   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1702   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1703   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1704   if (!row_identity || !col_identity) {
1705     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1706   }
1707 
1708   process_group_type pg;
1709   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1710   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1711   lgraph_type&   level_graph = *lgraph_p;
1712   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1713 
1714   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1715   ilu_permuted(level_graph);
1716 
1717   /* put together the new matrix */
1718   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1719   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1720   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1721   ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr);
1722   ierr = MatSetType(*fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1723   ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1724   ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1725   (*fact)->factor = FACTOR_LU;
1726 
1727   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1728   ierr = PetscContainerSetPointer(c, lgraph_p);
1729   ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1730   PetscFunctionReturn(0);
1731 }
1732 
1733 #undef __FUNCT__
1734 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1735 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1736 {
1737   PetscFunctionBegin;
1738   PetscFunctionReturn(0);
1739 }
1740 
1741 #undef __FUNCT__
1742 #define __FUNCT__ "MatSolve_MPIAIJ"
1743 /*
1744   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1745 */
1746 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1747 {
1748   namespace graph_dist = boost::graph::distributed;
1749 
1750   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1751   lgraph_type*   lgraph_p;
1752   PetscContainer c;
1753   PetscErrorCode ierr;
1754 
1755   PetscFunctionBegin;
1756   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1757   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1758   ierr = VecCopy(b, x); CHKERRQ(ierr);
1759 
1760   PetscScalar* array_x;
1761   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1762   PetscInt sx;
1763   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1764 
1765   PetscScalar* array_b;
1766   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1767   PetscInt sb;
1768   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1769 
1770   lgraph_type&   level_graph = *lgraph_p;
1771   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1772 
1773   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1774   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1775                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1776 
1777   typedef boost::iterator_property_map<array_ref_type::iterator,
1778                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1779   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1780                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1781 
1782   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1783 
1784   PetscFunctionReturn(0);
1785 }
1786 #endif
1787 
1788 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1789   PetscInt       nzlocal,nsends,nrecvs;
1790   PetscMPIInt    *send_rank;
1791   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
1792   PetscScalar    *sbuf_a,**rbuf_a;
1793   PetscErrorCode (*MatDestroy)(Mat);
1794 } Mat_Redundant;
1795 
1796 #undef __FUNCT__
1797 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1798 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1799 {
1800   PetscErrorCode       ierr;
1801   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1802   PetscInt             i;
1803 
1804   PetscFunctionBegin;
1805   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1806   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1807   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1808   for (i=0; i<redund->nrecvs; i++){
1809     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1810     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1811   }
1812   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1813   ierr = PetscFree(redund);CHKERRQ(ierr);
1814   PetscFunctionReturn(0);
1815 }
1816 
1817 #undef __FUNCT__
1818 #define __FUNCT__ "MatDestroy_MatRedundant"
1819 PetscErrorCode MatDestroy_MatRedundant(Mat A)
1820 {
1821   PetscErrorCode  ierr;
1822   PetscContainer  container;
1823   Mat_Redundant   *redund=PETSC_NULL;
1824 
1825   PetscFunctionBegin;
1826   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1827   if (container) {
1828     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1829   } else {
1830     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1831   }
1832   A->ops->destroy = redund->MatDestroy;
1833   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
1834   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
1835   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
1836   PetscFunctionReturn(0);
1837 }
1838 
1839 #undef __FUNCT__
1840 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
1841 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
1842 {
1843   PetscMPIInt    rank,size;
1844   MPI_Comm       comm=((PetscObject)mat)->comm;
1845   PetscErrorCode ierr;
1846   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
1847   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
1848   PetscInt       *rowrange=mat->rmap.range;
1849   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1850   Mat            A=aij->A,B=aij->B,C=*matredundant;
1851   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
1852   PetscScalar    *sbuf_a;
1853   PetscInt       nzlocal=a->nz+b->nz;
1854   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
1855   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
1856   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
1857   PetscScalar    *vals,*aworkA,*aworkB;
1858   PetscMPIInt    tag1,tag2,tag3,imdex;
1859   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
1860                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
1861   MPI_Status     recv_status,*send_status;
1862   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
1863   PetscInt       **rbuf_j=PETSC_NULL;
1864   PetscScalar    **rbuf_a=PETSC_NULL;
1865   Mat_Redundant  *redund=PETSC_NULL;
1866   PetscContainer container;
1867 
1868   PetscFunctionBegin;
1869   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1870   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1871 
1872   if (reuse == MAT_REUSE_MATRIX) {
1873     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
1874     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
1875     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
1876     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
1877     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1878     if (container) {
1879       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1880     } else {
1881       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1882     }
1883     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
1884 
1885     nsends    = redund->nsends;
1886     nrecvs    = redund->nrecvs;
1887     send_rank = redund->send_rank; recv_rank = send_rank + size;
1888     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
1889     sbuf_j    = redund->sbuf_j;
1890     sbuf_a    = redund->sbuf_a;
1891     rbuf_j    = redund->rbuf_j;
1892     rbuf_a    = redund->rbuf_a;
1893   }
1894 
1895   if (reuse == MAT_INITIAL_MATRIX){
1896     PetscMPIInt  subrank,subsize;
1897     PetscInt     nleftover,np_subcomm;
1898     /* get the destination processors' id send_rank, nsends and nrecvs */
1899     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
1900     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
1901     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
1902     recv_rank = send_rank + size;
1903     np_subcomm = size/nsubcomm;
1904     nleftover  = size - nsubcomm*np_subcomm;
1905     nsends = 0; nrecvs = 0;
1906     for (i=0; i<size; i++){ /* i=rank*/
1907       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
1908         send_rank[nsends] = i; nsends++;
1909         recv_rank[nrecvs++] = i;
1910       }
1911     }
1912     if (rank >= size - nleftover){/* this proc is a leftover processor */
1913       i = size-nleftover-1;
1914       j = 0;
1915       while (j < nsubcomm - nleftover){
1916         send_rank[nsends++] = i;
1917         i--; j++;
1918       }
1919     }
1920 
1921     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
1922       for (i=0; i<nleftover; i++){
1923         recv_rank[nrecvs++] = size-nleftover+i;
1924       }
1925     }
1926 
1927     /* allocate sbuf_j, sbuf_a */
1928     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
1929     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
1930     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
1931   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1932 
1933   /* copy mat's local entries into the buffers */
1934   if (reuse == MAT_INITIAL_MATRIX){
1935     rownz_max = 0;
1936     rptr = sbuf_j;
1937     cols = sbuf_j + rend-rstart + 1;
1938     vals = sbuf_a;
1939     rptr[0] = 0;
1940     for (i=0; i<rend-rstart; i++){
1941       row = i + rstart;
1942       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1943       ncols  = nzA + nzB;
1944       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1945       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1946       /* load the column indices for this row into cols */
1947       lwrite = 0;
1948       for (l=0; l<nzB; l++) {
1949         if ((ctmp = bmap[cworkB[l]]) < cstart){
1950           vals[lwrite]   = aworkB[l];
1951           cols[lwrite++] = ctmp;
1952         }
1953       }
1954       for (l=0; l<nzA; l++){
1955         vals[lwrite]   = aworkA[l];
1956         cols[lwrite++] = cstart + cworkA[l];
1957       }
1958       for (l=0; l<nzB; l++) {
1959         if ((ctmp = bmap[cworkB[l]]) >= cend){
1960           vals[lwrite]   = aworkB[l];
1961           cols[lwrite++] = ctmp;
1962         }
1963       }
1964       vals += ncols;
1965       cols += ncols;
1966       rptr[i+1] = rptr[i] + ncols;
1967       if (rownz_max < ncols) rownz_max = ncols;
1968     }
1969     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
1970   } else { /* only copy matrix values into sbuf_a */
1971     rptr = sbuf_j;
1972     vals = sbuf_a;
1973     rptr[0] = 0;
1974     for (i=0; i<rend-rstart; i++){
1975       row = i + rstart;
1976       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1977       ncols  = nzA + nzB;
1978       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1979       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1980       lwrite = 0;
1981       for (l=0; l<nzB; l++) {
1982         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
1983       }
1984       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
1985       for (l=0; l<nzB; l++) {
1986         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
1987       }
1988       vals += ncols;
1989       rptr[i+1] = rptr[i] + ncols;
1990     }
1991   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1992 
1993   /* send nzlocal to others, and recv other's nzlocal */
1994   /*--------------------------------------------------*/
1995   if (reuse == MAT_INITIAL_MATRIX){
1996     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
1997     s_waits2 = s_waits3 + nsends;
1998     s_waits1 = s_waits2 + nsends;
1999     r_waits1 = s_waits1 + nsends;
2000     r_waits2 = r_waits1 + nrecvs;
2001     r_waits3 = r_waits2 + nrecvs;
2002   } else {
2003     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2004     r_waits3 = s_waits3 + nsends;
2005   }
2006 
2007   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2008   if (reuse == MAT_INITIAL_MATRIX){
2009     /* get new tags to keep the communication clean */
2010     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2011     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2012     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2013     rbuf_nz = sbuf_nz + nsends;
2014 
2015     /* post receives of other's nzlocal */
2016     for (i=0; i<nrecvs; i++){
2017       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2018     }
2019     /* send nzlocal to others */
2020     for (i=0; i<nsends; i++){
2021       sbuf_nz[i] = nzlocal;
2022       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2023     }
2024     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2025     count = nrecvs;
2026     while (count) {
2027       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2028       recv_rank[imdex] = recv_status.MPI_SOURCE;
2029       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2030       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2031 
2032       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2033       rbuf_nz[imdex] += i + 2;
2034       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2035       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2036       count--;
2037     }
2038     /* wait on sends of nzlocal */
2039     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2040     /* send mat->i,j to others, and recv from other's */
2041     /*------------------------------------------------*/
2042     for (i=0; i<nsends; i++){
2043       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2044       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2045     }
2046     /* wait on receives of mat->i,j */
2047     /*------------------------------*/
2048     count = nrecvs;
2049     while (count) {
2050       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2051       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2052       count--;
2053     }
2054     /* wait on sends of mat->i,j */
2055     /*---------------------------*/
2056     if (nsends) {
2057       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2058     }
2059   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2060 
2061   /* post receives, send and receive mat->a */
2062   /*----------------------------------------*/
2063   for (imdex=0; imdex<nrecvs; imdex++) {
2064     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2065   }
2066   for (i=0; i<nsends; i++){
2067     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2068   }
2069   count = nrecvs;
2070   while (count) {
2071     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2072     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2073     count--;
2074   }
2075   if (nsends) {
2076     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2077   }
2078 
2079   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2080 
2081   /* create redundant matrix */
2082   /*-------------------------*/
2083   if (reuse == MAT_INITIAL_MATRIX){
2084     /* compute rownz_max for preallocation */
2085     for (imdex=0; imdex<nrecvs; imdex++){
2086       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2087       rptr = rbuf_j[imdex];
2088       for (i=0; i<j; i++){
2089         ncols = rptr[i+1] - rptr[i];
2090         if (rownz_max < ncols) rownz_max = ncols;
2091       }
2092     }
2093 
2094     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2095     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2096     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2097     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2098     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2099   } else {
2100     C = *matredundant;
2101   }
2102 
2103   /* insert local matrix entries */
2104   rptr = sbuf_j;
2105   cols = sbuf_j + rend-rstart + 1;
2106   vals = sbuf_a;
2107   for (i=0; i<rend-rstart; i++){
2108     row   = i + rstart;
2109     ncols = rptr[i+1] - rptr[i];
2110     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2111     vals += ncols;
2112     cols += ncols;
2113   }
2114   /* insert received matrix entries */
2115   for (imdex=0; imdex<nrecvs; imdex++){
2116     rstart = rowrange[recv_rank[imdex]];
2117     rend   = rowrange[recv_rank[imdex]+1];
2118     rptr = rbuf_j[imdex];
2119     cols = rbuf_j[imdex] + rend-rstart + 1;
2120     vals = rbuf_a[imdex];
2121     for (i=0; i<rend-rstart; i++){
2122       row   = i + rstart;
2123       ncols = rptr[i+1] - rptr[i];
2124       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2125       vals += ncols;
2126       cols += ncols;
2127     }
2128   }
2129   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2130   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2131   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2132   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
2133   if (reuse == MAT_INITIAL_MATRIX){
2134     PetscContainer container;
2135     *matredundant = C;
2136     /* create a supporting struct and attach it to C for reuse */
2137     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2138     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2139     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2140     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2141     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2142 
2143     redund->nzlocal = nzlocal;
2144     redund->nsends  = nsends;
2145     redund->nrecvs  = nrecvs;
2146     redund->send_rank = send_rank;
2147     redund->sbuf_nz = sbuf_nz;
2148     redund->sbuf_j  = sbuf_j;
2149     redund->sbuf_a  = sbuf_a;
2150     redund->rbuf_j  = rbuf_j;
2151     redund->rbuf_a  = rbuf_a;
2152 
2153     redund->MatDestroy = C->ops->destroy;
2154     C->ops->destroy    = MatDestroy_MatRedundant;
2155   }
2156   PetscFunctionReturn(0);
2157 }
2158 
2159 #undef __FUNCT__
2160 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2161 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2162 {
2163   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2164   PetscInt       n      = A->rmap.n;
2165   PetscInt       cstart = A->cmap.rstart;
2166   PetscInt      *cmap   = mat->garray;
2167   PetscInt      *diagIdx, *offdiagIdx;
2168   Vec            diagV, offdiagV;
2169   PetscScalar   *a, *diagA, *offdiagA;
2170   PetscInt       r;
2171   PetscErrorCode ierr;
2172 
2173   PetscFunctionBegin;
2174   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2175   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2176   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2177   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2178   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2179   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2180   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2181   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2182   for(r = 0; r < n; ++r) {
2183     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2184       a[r]   = diagA[r];
2185       idx[r] = cstart + diagIdx[r];
2186     } else {
2187       a[r]   = offdiagA[r];
2188       idx[r] = cmap[offdiagIdx[r]];
2189     }
2190   }
2191   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2192   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2193   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2194   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2195   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2196   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2197   PetscFunctionReturn(0);
2198 }
2199 
2200 #undef __FUNCT__
2201 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2202 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[])
2203 {
2204   PetscErrorCode ierr;
2205 
2206   PetscFunctionBegin;
2207   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
2208   PetscFunctionReturn(0);
2209 }
2210 
2211 /* -------------------------------------------------------------------*/
2212 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2213        MatGetRow_MPIAIJ,
2214        MatRestoreRow_MPIAIJ,
2215        MatMult_MPIAIJ,
2216 /* 4*/ MatMultAdd_MPIAIJ,
2217        MatMultTranspose_MPIAIJ,
2218        MatMultTransposeAdd_MPIAIJ,
2219 #ifdef PETSC_HAVE_PBGL
2220        MatSolve_MPIAIJ,
2221 #else
2222        0,
2223 #endif
2224        0,
2225        0,
2226 /*10*/ 0,
2227        0,
2228        0,
2229        MatRelax_MPIAIJ,
2230        MatTranspose_MPIAIJ,
2231 /*15*/ MatGetInfo_MPIAIJ,
2232        MatEqual_MPIAIJ,
2233        MatGetDiagonal_MPIAIJ,
2234        MatDiagonalScale_MPIAIJ,
2235        MatNorm_MPIAIJ,
2236 /*20*/ MatAssemblyBegin_MPIAIJ,
2237        MatAssemblyEnd_MPIAIJ,
2238        0,
2239        MatSetOption_MPIAIJ,
2240        MatZeroEntries_MPIAIJ,
2241 /*25*/ MatZeroRows_MPIAIJ,
2242        0,
2243 #ifdef PETSC_HAVE_PBGL
2244        MatLUFactorNumeric_MPIAIJ,
2245 #else
2246        0,
2247 #endif
2248        0,
2249        0,
2250 /*30*/ MatSetUpPreallocation_MPIAIJ,
2251 #ifdef PETSC_HAVE_PBGL
2252        MatILUFactorSymbolic_MPIAIJ,
2253 #else
2254        0,
2255 #endif
2256        0,
2257        0,
2258        0,
2259 /*35*/ MatDuplicate_MPIAIJ,
2260        0,
2261        0,
2262        0,
2263        0,
2264 /*40*/ MatAXPY_MPIAIJ,
2265        MatGetSubMatrices_MPIAIJ,
2266        MatIncreaseOverlap_MPIAIJ,
2267        MatGetValues_MPIAIJ,
2268        MatCopy_MPIAIJ,
2269 /*45*/ 0,
2270        MatScale_MPIAIJ,
2271        0,
2272        0,
2273        0,
2274 /*50*/ MatSetBlockSize_MPIAIJ,
2275        0,
2276        0,
2277        0,
2278        0,
2279 /*55*/ MatFDColoringCreate_MPIAIJ,
2280        0,
2281        MatSetUnfactored_MPIAIJ,
2282        MatPermute_MPIAIJ,
2283        0,
2284 /*60*/ MatGetSubMatrix_MPIAIJ,
2285        MatDestroy_MPIAIJ,
2286        MatView_MPIAIJ,
2287        0,
2288        0,
2289 /*65*/ 0,
2290        0,
2291        0,
2292        0,
2293        0,
2294 /*70*/ 0,
2295        0,
2296        MatSetColoring_MPIAIJ,
2297 #if defined(PETSC_HAVE_ADIC)
2298        MatSetValuesAdic_MPIAIJ,
2299 #else
2300        0,
2301 #endif
2302        MatSetValuesAdifor_MPIAIJ,
2303 /*75*/ 0,
2304        0,
2305        0,
2306        0,
2307        0,
2308 /*80*/ 0,
2309        0,
2310        0,
2311        0,
2312 /*84*/ MatLoad_MPIAIJ,
2313        0,
2314        0,
2315        0,
2316        0,
2317        0,
2318 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2319        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2320        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2321        MatPtAP_Basic,
2322        MatPtAPSymbolic_MPIAIJ,
2323 /*95*/ MatPtAPNumeric_MPIAIJ,
2324        0,
2325        0,
2326        0,
2327        0,
2328 /*100*/0,
2329        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2330        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2331        MatConjugate_MPIAIJ,
2332        0,
2333 /*105*/MatSetValuesRow_MPIAIJ,
2334        MatRealPart_MPIAIJ,
2335        MatImaginaryPart_MPIAIJ,
2336        0,
2337        0,
2338 /*110*/0,
2339        MatGetRedundantMatrix_MPIAIJ,
2340        MatGetRowMin_MPIAIJ,
2341        0,
2342        0,
2343 /*115*/MatGetSeqNonzerostructure_MPIAIJ};
2344 
2345 /* ----------------------------------------------------------------------------------------*/
2346 
2347 EXTERN_C_BEGIN
2348 #undef __FUNCT__
2349 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2350 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2351 {
2352   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2353   PetscErrorCode ierr;
2354 
2355   PetscFunctionBegin;
2356   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2357   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2358   PetscFunctionReturn(0);
2359 }
2360 EXTERN_C_END
2361 
2362 EXTERN_C_BEGIN
2363 #undef __FUNCT__
2364 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2365 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2366 {
2367   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2368   PetscErrorCode ierr;
2369 
2370   PetscFunctionBegin;
2371   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2372   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2373   PetscFunctionReturn(0);
2374 }
2375 EXTERN_C_END
2376 
2377 #include "petscpc.h"
2378 EXTERN_C_BEGIN
2379 #undef __FUNCT__
2380 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2381 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2382 {
2383   Mat_MPIAIJ     *b;
2384   PetscErrorCode ierr;
2385   PetscInt       i;
2386 
2387   PetscFunctionBegin;
2388   B->preallocated = PETSC_TRUE;
2389   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2390   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2391   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2392   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2393 
2394   B->rmap.bs = B->cmap.bs = 1;
2395   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2396   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2397   if (d_nnz) {
2398     for (i=0; i<B->rmap.n; i++) {
2399       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2400     }
2401   }
2402   if (o_nnz) {
2403     for (i=0; i<B->rmap.n; i++) {
2404       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2405     }
2406   }
2407   b = (Mat_MPIAIJ*)B->data;
2408 
2409   /* Explicitly create 2 MATSEQAIJ matrices. */
2410   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2411   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2412   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2413   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2414   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2415   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2416   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2417   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2418 
2419   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2420   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2421 
2422   PetscFunctionReturn(0);
2423 }
2424 EXTERN_C_END
2425 
2426 #undef __FUNCT__
2427 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2428 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2429 {
2430   Mat            mat;
2431   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2432   PetscErrorCode ierr;
2433 
2434   PetscFunctionBegin;
2435   *newmat       = 0;
2436   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2437   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2438   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2439   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2440   a    = (Mat_MPIAIJ*)mat->data;
2441 
2442   mat->factor       = matin->factor;
2443   mat->rmap.bs      = matin->rmap.bs;
2444   mat->assembled    = PETSC_TRUE;
2445   mat->insertmode   = NOT_SET_VALUES;
2446   mat->preallocated = PETSC_TRUE;
2447 
2448   a->size           = oldmat->size;
2449   a->rank           = oldmat->rank;
2450   a->donotstash     = oldmat->donotstash;
2451   a->roworiented    = oldmat->roworiented;
2452   a->rowindices     = 0;
2453   a->rowvalues      = 0;
2454   a->getrowactive   = PETSC_FALSE;
2455 
2456   ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2457   ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2458 
2459   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2460   if (oldmat->colmap) {
2461 #if defined (PETSC_USE_CTABLE)
2462     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2463 #else
2464     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2465     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2466     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2467 #endif
2468   } else a->colmap = 0;
2469   if (oldmat->garray) {
2470     PetscInt len;
2471     len  = oldmat->B->cmap.n;
2472     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2473     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2474     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2475   } else a->garray = 0;
2476 
2477   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2478   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2479   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2480   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2481   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2482   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2483   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2484   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2485   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2486   *newmat = mat;
2487   PetscFunctionReturn(0);
2488 }
2489 
2490 #include "petscsys.h"
2491 
2492 #undef __FUNCT__
2493 #define __FUNCT__ "MatLoad_MPIAIJ"
2494 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2495 {
2496   Mat            A;
2497   PetscScalar    *vals,*svals;
2498   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2499   MPI_Status     status;
2500   PetscErrorCode ierr;
2501   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2502   PetscInt       i,nz,j,rstart,rend,mmax;
2503   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2504   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2505   PetscInt       cend,cstart,n,*rowners;
2506   int            fd;
2507 
2508   PetscFunctionBegin;
2509   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2510   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2511   if (!rank) {
2512     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2513     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2514     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2515   }
2516 
2517   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2518   M = header[1]; N = header[2];
2519   /* determine ownership of all rows */
2520   m    = M/size + ((M % size) > rank);
2521   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2522   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2523 
2524   /* First process needs enough room for process with most rows */
2525   if (!rank) {
2526     mmax       = rowners[1];
2527     for (i=2; i<size; i++) {
2528       mmax = PetscMax(mmax,rowners[i]);
2529     }
2530   } else mmax = m;
2531 
2532   rowners[0] = 0;
2533   for (i=2; i<=size; i++) {
2534     rowners[i] += rowners[i-1];
2535   }
2536   rstart = rowners[rank];
2537   rend   = rowners[rank+1];
2538 
2539   /* distribute row lengths to all processors */
2540   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2541   if (!rank) {
2542     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2543     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2544     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2545     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2546     for (j=0; j<m; j++) {
2547       procsnz[0] += ourlens[j];
2548     }
2549     for (i=1; i<size; i++) {
2550       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2551       /* calculate the number of nonzeros on each processor */
2552       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2553         procsnz[i] += rowlengths[j];
2554       }
2555       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2556     }
2557     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2558   } else {
2559     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2560   }
2561 
2562   if (!rank) {
2563     /* determine max buffer needed and allocate it */
2564     maxnz = 0;
2565     for (i=0; i<size; i++) {
2566       maxnz = PetscMax(maxnz,procsnz[i]);
2567     }
2568     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2569 
2570     /* read in my part of the matrix column indices  */
2571     nz   = procsnz[0];
2572     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2573     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2574 
2575     /* read in every one elses and ship off */
2576     for (i=1; i<size; i++) {
2577       nz   = procsnz[i];
2578       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2579       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2580     }
2581     ierr = PetscFree(cols);CHKERRQ(ierr);
2582   } else {
2583     /* determine buffer space needed for message */
2584     nz = 0;
2585     for (i=0; i<m; i++) {
2586       nz += ourlens[i];
2587     }
2588     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2589 
2590     /* receive message of column indices*/
2591     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2592     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2593     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2594   }
2595 
2596   /* determine column ownership if matrix is not square */
2597   if (N != M) {
2598     n      = N/size + ((N % size) > rank);
2599     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2600     cstart = cend - n;
2601   } else {
2602     cstart = rstart;
2603     cend   = rend;
2604     n      = cend - cstart;
2605   }
2606 
2607   /* loop over local rows, determining number of off diagonal entries */
2608   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2609   jj = 0;
2610   for (i=0; i<m; i++) {
2611     for (j=0; j<ourlens[i]; j++) {
2612       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2613       jj++;
2614     }
2615   }
2616 
2617   /* create our matrix */
2618   for (i=0; i<m; i++) {
2619     ourlens[i] -= offlens[i];
2620   }
2621   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2622   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2623   ierr = MatSetType(A,type);CHKERRQ(ierr);
2624   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2625 
2626   for (i=0; i<m; i++) {
2627     ourlens[i] += offlens[i];
2628   }
2629 
2630   if (!rank) {
2631     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2632 
2633     /* read in my part of the matrix numerical values  */
2634     nz   = procsnz[0];
2635     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2636 
2637     /* insert into matrix */
2638     jj      = rstart;
2639     smycols = mycols;
2640     svals   = vals;
2641     for (i=0; i<m; i++) {
2642       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2643       smycols += ourlens[i];
2644       svals   += ourlens[i];
2645       jj++;
2646     }
2647 
2648     /* read in other processors and ship out */
2649     for (i=1; i<size; i++) {
2650       nz   = procsnz[i];
2651       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2652       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2653     }
2654     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2655   } else {
2656     /* receive numeric values */
2657     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2658 
2659     /* receive message of values*/
2660     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2661     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2662     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2663 
2664     /* insert into matrix */
2665     jj      = rstart;
2666     smycols = mycols;
2667     svals   = vals;
2668     for (i=0; i<m; i++) {
2669       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2670       smycols += ourlens[i];
2671       svals   += ourlens[i];
2672       jj++;
2673     }
2674   }
2675   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2676   ierr = PetscFree(vals);CHKERRQ(ierr);
2677   ierr = PetscFree(mycols);CHKERRQ(ierr);
2678   ierr = PetscFree(rowners);CHKERRQ(ierr);
2679 
2680   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2681   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2682   *newmat = A;
2683   PetscFunctionReturn(0);
2684 }
2685 
2686 #undef __FUNCT__
2687 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2688 /*
2689     Not great since it makes two copies of the submatrix, first an SeqAIJ
2690   in local and then by concatenating the local matrices the end result.
2691   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2692 */
2693 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2694 {
2695   PetscErrorCode ierr;
2696   PetscMPIInt    rank,size;
2697   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2698   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2699   Mat            *local,M,Mreuse;
2700   PetscScalar    *vwork,*aa;
2701   MPI_Comm       comm = ((PetscObject)mat)->comm;
2702   Mat_SeqAIJ     *aij;
2703 
2704 
2705   PetscFunctionBegin;
2706   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2707   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2708 
2709   if (call ==  MAT_REUSE_MATRIX) {
2710     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2711     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2712     local = &Mreuse;
2713     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2714   } else {
2715     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2716     Mreuse = *local;
2717     ierr   = PetscFree(local);CHKERRQ(ierr);
2718   }
2719 
2720   /*
2721       m - number of local rows
2722       n - number of columns (same on all processors)
2723       rstart - first row in new global matrix generated
2724   */
2725   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2726   if (call == MAT_INITIAL_MATRIX) {
2727     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2728     ii  = aij->i;
2729     jj  = aij->j;
2730 
2731     /*
2732         Determine the number of non-zeros in the diagonal and off-diagonal
2733         portions of the matrix in order to do correct preallocation
2734     */
2735 
2736     /* first get start and end of "diagonal" columns */
2737     if (csize == PETSC_DECIDE) {
2738       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2739       if (mglobal == n) { /* square matrix */
2740 	nlocal = m;
2741       } else {
2742         nlocal = n/size + ((n % size) > rank);
2743       }
2744     } else {
2745       nlocal = csize;
2746     }
2747     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2748     rstart = rend - nlocal;
2749     if (rank == size - 1 && rend != n) {
2750       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2751     }
2752 
2753     /* next, compute all the lengths */
2754     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2755     olens = dlens + m;
2756     for (i=0; i<m; i++) {
2757       jend = ii[i+1] - ii[i];
2758       olen = 0;
2759       dlen = 0;
2760       for (j=0; j<jend; j++) {
2761         if (*jj < rstart || *jj >= rend) olen++;
2762         else dlen++;
2763         jj++;
2764       }
2765       olens[i] = olen;
2766       dlens[i] = dlen;
2767     }
2768     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2769     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2770     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2771     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2772     ierr = PetscFree(dlens);CHKERRQ(ierr);
2773   } else {
2774     PetscInt ml,nl;
2775 
2776     M = *newmat;
2777     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2778     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2779     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2780     /*
2781          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2782        rather than the slower MatSetValues().
2783     */
2784     M->was_assembled = PETSC_TRUE;
2785     M->assembled     = PETSC_FALSE;
2786   }
2787   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2788   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2789   ii  = aij->i;
2790   jj  = aij->j;
2791   aa  = aij->a;
2792   for (i=0; i<m; i++) {
2793     row   = rstart + i;
2794     nz    = ii[i+1] - ii[i];
2795     cwork = jj;     jj += nz;
2796     vwork = aa;     aa += nz;
2797     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2798   }
2799 
2800   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2801   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2802   *newmat = M;
2803 
2804   /* save submatrix used in processor for next request */
2805   if (call ==  MAT_INITIAL_MATRIX) {
2806     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2807     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2808   }
2809 
2810   PetscFunctionReturn(0);
2811 }
2812 
2813 EXTERN_C_BEGIN
2814 #undef __FUNCT__
2815 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2816 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2817 {
2818   PetscInt       m,cstart, cend,j,nnz,i,d;
2819   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2820   const PetscInt *JJ;
2821   PetscScalar    *values;
2822   PetscErrorCode ierr;
2823 
2824   PetscFunctionBegin;
2825   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2826 
2827   B->rmap.bs = B->cmap.bs = 1;
2828   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2829   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2830   m      = B->rmap.n;
2831   cstart = B->cmap.rstart;
2832   cend   = B->cmap.rend;
2833   rstart = B->rmap.rstart;
2834 
2835   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2836   o_nnz = d_nnz + m;
2837 
2838 #if defined(PETSC_USE_DEBUGGING)
2839   for (i=0; i<m; i++) {
2840     nnz     = Ii[i+1]- Ii[i];
2841     JJ      = J + Ii[i];
2842     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2843     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
2844     if (nnz && (JJ[nnz-1] >= B->cmap.N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap.N);
2845     for (j=1; j<nnz; j++) {
2846       if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
2847     }
2848   }
2849 #endif
2850 
2851   for (i=0; i<m; i++) {
2852     nnz     = Ii[i+1]- Ii[i];
2853     JJ      = J + Ii[i];
2854     nnz_max = PetscMax(nnz_max,nnz);
2855     for (j=0; j<nnz; j++) {
2856       if (*JJ >= cstart) break;
2857       JJ++;
2858     }
2859     d = 0;
2860     for (; j<nnz; j++) {
2861       if (*JJ++ >= cend) break;
2862       d++;
2863     }
2864     d_nnz[i] = d;
2865     o_nnz[i] = nnz - d;
2866   }
2867   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2868   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2869 
2870   if (v) values = (PetscScalar*)v;
2871   else {
2872     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2873     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2874   }
2875 
2876   for (i=0; i<m; i++) {
2877     ii   = i + rstart;
2878     nnz  = Ii[i+1]- Ii[i];
2879     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2880   }
2881   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2882   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2883 
2884   if (!v) {
2885     ierr = PetscFree(values);CHKERRQ(ierr);
2886   }
2887   PetscFunctionReturn(0);
2888 }
2889 EXTERN_C_END
2890 
2891 #undef __FUNCT__
2892 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2893 /*@
2894    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2895    (the default parallel PETSc format).
2896 
2897    Collective on MPI_Comm
2898 
2899    Input Parameters:
2900 +  B - the matrix
2901 .  i - the indices into j for the start of each local row (starts with zero)
2902 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2903 -  v - optional values in the matrix
2904 
2905    Level: developer
2906 
2907    Notes:
2908        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2909      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2910      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2911 
2912        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2913 
2914        The format which is used for the sparse matrix input, is equivalent to a
2915     row-major ordering.. i.e for the following matrix, the input data expected is
2916     as shown:
2917 
2918         1 0 0
2919         2 0 3     P0
2920        -------
2921         4 5 6     P1
2922 
2923      Process0 [P0]: rows_owned=[0,1]
2924         i =  {0,1,3}  [size = nrow+1  = 2+1]
2925         j =  {0,0,2}  [size = nz = 6]
2926         v =  {1,2,3}  [size = nz = 6]
2927 
2928      Process1 [P1]: rows_owned=[2]
2929         i =  {0,3}    [size = nrow+1  = 1+1]
2930         j =  {0,1,2}  [size = nz = 6]
2931         v =  {4,5,6}  [size = nz = 6]
2932 
2933       The column indices for each row MUST be sorted.
2934 
2935 .keywords: matrix, aij, compressed row, sparse, parallel
2936 
2937 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
2938           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
2939 @*/
2940 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2941 {
2942   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2943 
2944   PetscFunctionBegin;
2945   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2946   if (f) {
2947     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2948   }
2949   PetscFunctionReturn(0);
2950 }
2951 
2952 #undef __FUNCT__
2953 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2954 /*@C
2955    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2956    (the default parallel PETSc format).  For good matrix assembly performance
2957    the user should preallocate the matrix storage by setting the parameters
2958    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2959    performance can be increased by more than a factor of 50.
2960 
2961    Collective on MPI_Comm
2962 
2963    Input Parameters:
2964 +  A - the matrix
2965 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2966            (same value is used for all local rows)
2967 .  d_nnz - array containing the number of nonzeros in the various rows of the
2968            DIAGONAL portion of the local submatrix (possibly different for each row)
2969            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2970            The size of this array is equal to the number of local rows, i.e 'm'.
2971            You must leave room for the diagonal entry even if it is zero.
2972 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2973            submatrix (same value is used for all local rows).
2974 -  o_nnz - array containing the number of nonzeros in the various rows of the
2975            OFF-DIAGONAL portion of the local submatrix (possibly different for
2976            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2977            structure. The size of this array is equal to the number
2978            of local rows, i.e 'm'.
2979 
2980    If the *_nnz parameter is given then the *_nz parameter is ignored
2981 
2982    The AIJ format (also called the Yale sparse matrix format or
2983    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2984    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2985 
2986    The parallel matrix is partitioned such that the first m0 rows belong to
2987    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2988    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2989 
2990    The DIAGONAL portion of the local submatrix of a processor can be defined
2991    as the submatrix which is obtained by extraction the part corresponding
2992    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2993    first row that belongs to the processor, and r2 is the last row belonging
2994    to the this processor. This is a square mxm matrix. The remaining portion
2995    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2996 
2997    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2998 
2999    You can call MatGetInfo() to get information on how effective the preallocation was;
3000    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3001    You can also run with the option -info and look for messages with the string
3002    malloc in them to see if additional memory allocation was needed.
3003 
3004    Example usage:
3005 
3006    Consider the following 8x8 matrix with 34 non-zero values, that is
3007    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3008    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3009    as follows:
3010 
3011 .vb
3012             1  2  0  |  0  3  0  |  0  4
3013     Proc0   0  5  6  |  7  0  0  |  8  0
3014             9  0 10  | 11  0  0  | 12  0
3015     -------------------------------------
3016            13  0 14  | 15 16 17  |  0  0
3017     Proc1   0 18  0  | 19 20 21  |  0  0
3018             0  0  0  | 22 23  0  | 24  0
3019     -------------------------------------
3020     Proc2  25 26 27  |  0  0 28  | 29  0
3021            30  0  0  | 31 32 33  |  0 34
3022 .ve
3023 
3024    This can be represented as a collection of submatrices as:
3025 
3026 .vb
3027       A B C
3028       D E F
3029       G H I
3030 .ve
3031 
3032    Where the submatrices A,B,C are owned by proc0, D,E,F are
3033    owned by proc1, G,H,I are owned by proc2.
3034 
3035    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3036    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3037    The 'M','N' parameters are 8,8, and have the same values on all procs.
3038 
3039    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3040    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3041    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3042    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3043    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3044    matrix, ans [DF] as another SeqAIJ matrix.
3045 
3046    When d_nz, o_nz parameters are specified, d_nz storage elements are
3047    allocated for every row of the local diagonal submatrix, and o_nz
3048    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3049    One way to choose d_nz and o_nz is to use the max nonzerors per local
3050    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3051    In this case, the values of d_nz,o_nz are:
3052 .vb
3053      proc0 : dnz = 2, o_nz = 2
3054      proc1 : dnz = 3, o_nz = 2
3055      proc2 : dnz = 1, o_nz = 4
3056 .ve
3057    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3058    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3059    for proc3. i.e we are using 12+15+10=37 storage locations to store
3060    34 values.
3061 
3062    When d_nnz, o_nnz parameters are specified, the storage is specified
3063    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3064    In the above case the values for d_nnz,o_nnz are:
3065 .vb
3066      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3067      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3068      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3069 .ve
3070    Here the space allocated is sum of all the above values i.e 34, and
3071    hence pre-allocation is perfect.
3072 
3073    Level: intermediate
3074 
3075 .keywords: matrix, aij, compressed row, sparse, parallel
3076 
3077 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3078           MPIAIJ, MatGetInfo()
3079 @*/
3080 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3081 {
3082   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3083 
3084   PetscFunctionBegin;
3085   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3086   if (f) {
3087     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3088   }
3089   PetscFunctionReturn(0);
3090 }
3091 
3092 #undef __FUNCT__
3093 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3094 /*@
3095      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3096          CSR format the local rows.
3097 
3098    Collective on MPI_Comm
3099 
3100    Input Parameters:
3101 +  comm - MPI communicator
3102 .  m - number of local rows (Cannot be PETSC_DECIDE)
3103 .  n - This value should be the same as the local size used in creating the
3104        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3105        calculated if N is given) For square matrices n is almost always m.
3106 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3107 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3108 .   i - row indices
3109 .   j - column indices
3110 -   a - matrix values
3111 
3112    Output Parameter:
3113 .   mat - the matrix
3114 
3115    Level: intermediate
3116 
3117    Notes:
3118        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3119      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3120      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3121 
3122        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3123 
3124        The format which is used for the sparse matrix input, is equivalent to a
3125     row-major ordering.. i.e for the following matrix, the input data expected is
3126     as shown:
3127 
3128         1 0 0
3129         2 0 3     P0
3130        -------
3131         4 5 6     P1
3132 
3133      Process0 [P0]: rows_owned=[0,1]
3134         i =  {0,1,3}  [size = nrow+1  = 2+1]
3135         j =  {0,0,2}  [size = nz = 6]
3136         v =  {1,2,3}  [size = nz = 6]
3137 
3138      Process1 [P1]: rows_owned=[2]
3139         i =  {0,3}    [size = nrow+1  = 1+1]
3140         j =  {0,1,2}  [size = nz = 6]
3141         v =  {4,5,6}  [size = nz = 6]
3142 
3143 .keywords: matrix, aij, compressed row, sparse, parallel
3144 
3145 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3146           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3147 @*/
3148 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3149 {
3150   PetscErrorCode ierr;
3151 
3152  PetscFunctionBegin;
3153   if (i[0]) {
3154     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3155   }
3156   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3157   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3158   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3159   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3160   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3161   PetscFunctionReturn(0);
3162 }
3163 
3164 #undef __FUNCT__
3165 #define __FUNCT__ "MatCreateMPIAIJ"
3166 /*@C
3167    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3168    (the default parallel PETSc format).  For good matrix assembly performance
3169    the user should preallocate the matrix storage by setting the parameters
3170    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3171    performance can be increased by more than a factor of 50.
3172 
3173    Collective on MPI_Comm
3174 
3175    Input Parameters:
3176 +  comm - MPI communicator
3177 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3178            This value should be the same as the local size used in creating the
3179            y vector for the matrix-vector product y = Ax.
3180 .  n - This value should be the same as the local size used in creating the
3181        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3182        calculated if N is given) For square matrices n is almost always m.
3183 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3184 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3185 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3186            (same value is used for all local rows)
3187 .  d_nnz - array containing the number of nonzeros in the various rows of the
3188            DIAGONAL portion of the local submatrix (possibly different for each row)
3189            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3190            The size of this array is equal to the number of local rows, i.e 'm'.
3191            You must leave room for the diagonal entry even if it is zero.
3192 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3193            submatrix (same value is used for all local rows).
3194 -  o_nnz - array containing the number of nonzeros in the various rows of the
3195            OFF-DIAGONAL portion of the local submatrix (possibly different for
3196            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3197            structure. The size of this array is equal to the number
3198            of local rows, i.e 'm'.
3199 
3200    Output Parameter:
3201 .  A - the matrix
3202 
3203    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3204    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
3205    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
3206    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3207 
3208    Notes:
3209    If the *_nnz parameter is given then the *_nz parameter is ignored
3210 
3211    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3212    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3213    storage requirements for this matrix.
3214 
3215    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3216    processor than it must be used on all processors that share the object for
3217    that argument.
3218 
3219    The user MUST specify either the local or global matrix dimensions
3220    (possibly both).
3221 
3222    The parallel matrix is partitioned across processors such that the
3223    first m0 rows belong to process 0, the next m1 rows belong to
3224    process 1, the next m2 rows belong to process 2 etc.. where
3225    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3226    values corresponding to [m x N] submatrix.
3227 
3228    The columns are logically partitioned with the n0 columns belonging
3229    to 0th partition, the next n1 columns belonging to the next
3230    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3231 
3232    The DIAGONAL portion of the local submatrix on any given processor
3233    is the submatrix corresponding to the rows and columns m,n
3234    corresponding to the given processor. i.e diagonal matrix on
3235    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3236    etc. The remaining portion of the local submatrix [m x (N-n)]
3237    constitute the OFF-DIAGONAL portion. The example below better
3238    illustrates this concept.
3239 
3240    For a square global matrix we define each processor's diagonal portion
3241    to be its local rows and the corresponding columns (a square submatrix);
3242    each processor's off-diagonal portion encompasses the remainder of the
3243    local matrix (a rectangular submatrix).
3244 
3245    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3246 
3247    When calling this routine with a single process communicator, a matrix of
3248    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3249    type of communicator, use the construction mechanism:
3250      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3251 
3252    By default, this format uses inodes (identical nodes) when possible.
3253    We search for consecutive rows with the same nonzero structure, thereby
3254    reusing matrix information to achieve increased efficiency.
3255 
3256    Options Database Keys:
3257 +  -mat_no_inode  - Do not use inodes
3258 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3259 -  -mat_aij_oneindex - Internally use indexing starting at 1
3260         rather than 0.  Note that when calling MatSetValues(),
3261         the user still MUST index entries starting at 0!
3262 
3263 
3264    Example usage:
3265 
3266    Consider the following 8x8 matrix with 34 non-zero values, that is
3267    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3268    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3269    as follows:
3270 
3271 .vb
3272             1  2  0  |  0  3  0  |  0  4
3273     Proc0   0  5  6  |  7  0  0  |  8  0
3274             9  0 10  | 11  0  0  | 12  0
3275     -------------------------------------
3276            13  0 14  | 15 16 17  |  0  0
3277     Proc1   0 18  0  | 19 20 21  |  0  0
3278             0  0  0  | 22 23  0  | 24  0
3279     -------------------------------------
3280     Proc2  25 26 27  |  0  0 28  | 29  0
3281            30  0  0  | 31 32 33  |  0 34
3282 .ve
3283 
3284    This can be represented as a collection of submatrices as:
3285 
3286 .vb
3287       A B C
3288       D E F
3289       G H I
3290 .ve
3291 
3292    Where the submatrices A,B,C are owned by proc0, D,E,F are
3293    owned by proc1, G,H,I are owned by proc2.
3294 
3295    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3296    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3297    The 'M','N' parameters are 8,8, and have the same values on all procs.
3298 
3299    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3300    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3301    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3302    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3303    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3304    matrix, ans [DF] as another SeqAIJ matrix.
3305 
3306    When d_nz, o_nz parameters are specified, d_nz storage elements are
3307    allocated for every row of the local diagonal submatrix, and o_nz
3308    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3309    One way to choose d_nz and o_nz is to use the max nonzerors per local
3310    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3311    In this case, the values of d_nz,o_nz are:
3312 .vb
3313      proc0 : dnz = 2, o_nz = 2
3314      proc1 : dnz = 3, o_nz = 2
3315      proc2 : dnz = 1, o_nz = 4
3316 .ve
3317    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3318    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3319    for proc3. i.e we are using 12+15+10=37 storage locations to store
3320    34 values.
3321 
3322    When d_nnz, o_nnz parameters are specified, the storage is specified
3323    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3324    In the above case the values for d_nnz,o_nnz are:
3325 .vb
3326      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3327      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3328      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3329 .ve
3330    Here the space allocated is sum of all the above values i.e 34, and
3331    hence pre-allocation is perfect.
3332 
3333    Level: intermediate
3334 
3335 .keywords: matrix, aij, compressed row, sparse, parallel
3336 
3337 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3338           MPIAIJ, MatCreateMPIAIJWithArrays()
3339 @*/
3340 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3341 {
3342   PetscErrorCode ierr;
3343   PetscMPIInt    size;
3344 
3345   PetscFunctionBegin;
3346   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3347   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3348   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3349   if (size > 1) {
3350     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3351     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3352   } else {
3353     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3354     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3355   }
3356   PetscFunctionReturn(0);
3357 }
3358 
3359 #undef __FUNCT__
3360 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3361 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3362 {
3363   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3364 
3365   PetscFunctionBegin;
3366   *Ad     = a->A;
3367   *Ao     = a->B;
3368   *colmap = a->garray;
3369   PetscFunctionReturn(0);
3370 }
3371 
3372 #undef __FUNCT__
3373 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3374 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3375 {
3376   PetscErrorCode ierr;
3377   PetscInt       i;
3378   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3379 
3380   PetscFunctionBegin;
3381   if (coloring->ctype == IS_COLORING_GLOBAL) {
3382     ISColoringValue *allcolors,*colors;
3383     ISColoring      ocoloring;
3384 
3385     /* set coloring for diagonal portion */
3386     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3387 
3388     /* set coloring for off-diagonal portion */
3389     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3390     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3391     for (i=0; i<a->B->cmap.n; i++) {
3392       colors[i] = allcolors[a->garray[i]];
3393     }
3394     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3395     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3396     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3397     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3398   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3399     ISColoringValue *colors;
3400     PetscInt        *larray;
3401     ISColoring      ocoloring;
3402 
3403     /* set coloring for diagonal portion */
3404     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3405     for (i=0; i<a->A->cmap.n; i++) {
3406       larray[i] = i + A->cmap.rstart;
3407     }
3408     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3409     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3410     for (i=0; i<a->A->cmap.n; i++) {
3411       colors[i] = coloring->colors[larray[i]];
3412     }
3413     ierr = PetscFree(larray);CHKERRQ(ierr);
3414     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3415     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3416     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3417 
3418     /* set coloring for off-diagonal portion */
3419     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3420     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3421     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3422     for (i=0; i<a->B->cmap.n; i++) {
3423       colors[i] = coloring->colors[larray[i]];
3424     }
3425     ierr = PetscFree(larray);CHKERRQ(ierr);
3426     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3427     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3428     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3429   } else {
3430     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3431   }
3432 
3433   PetscFunctionReturn(0);
3434 }
3435 
3436 #if defined(PETSC_HAVE_ADIC)
3437 #undef __FUNCT__
3438 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3439 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3440 {
3441   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3442   PetscErrorCode ierr;
3443 
3444   PetscFunctionBegin;
3445   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3446   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3447   PetscFunctionReturn(0);
3448 }
3449 #endif
3450 
3451 #undef __FUNCT__
3452 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3453 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3454 {
3455   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3456   PetscErrorCode ierr;
3457 
3458   PetscFunctionBegin;
3459   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3460   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3461   PetscFunctionReturn(0);
3462 }
3463 
3464 #undef __FUNCT__
3465 #define __FUNCT__ "MatMerge"
3466 /*@
3467       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3468                  matrices from each processor
3469 
3470     Collective on MPI_Comm
3471 
3472    Input Parameters:
3473 +    comm - the communicators the parallel matrix will live on
3474 .    inmat - the input sequential matrices
3475 .    n - number of local columns (or PETSC_DECIDE)
3476 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3477 
3478    Output Parameter:
3479 .    outmat - the parallel matrix generated
3480 
3481     Level: advanced
3482 
3483    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3484 
3485 @*/
3486 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3487 {
3488   PetscErrorCode ierr;
3489   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3490   PetscInt       *indx;
3491   PetscScalar    *values;
3492 
3493   PetscFunctionBegin;
3494   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3495   if (scall == MAT_INITIAL_MATRIX){
3496     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3497     if (n == PETSC_DECIDE){
3498       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3499     }
3500     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3501     rstart -= m;
3502 
3503     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3504     for (i=0;i<m;i++) {
3505       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3506       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3507       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3508     }
3509     /* This routine will ONLY return MPIAIJ type matrix */
3510     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3511     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3512     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3513     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3514     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3515 
3516   } else if (scall == MAT_REUSE_MATRIX){
3517     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3518   } else {
3519     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3520   }
3521 
3522   for (i=0;i<m;i++) {
3523     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3524     Ii    = i + rstart;
3525     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3526     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3527   }
3528   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3529   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3530   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3531 
3532   PetscFunctionReturn(0);
3533 }
3534 
3535 #undef __FUNCT__
3536 #define __FUNCT__ "MatFileSplit"
3537 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3538 {
3539   PetscErrorCode    ierr;
3540   PetscMPIInt       rank;
3541   PetscInt          m,N,i,rstart,nnz;
3542   size_t            len;
3543   const PetscInt    *indx;
3544   PetscViewer       out;
3545   char              *name;
3546   Mat               B;
3547   const PetscScalar *values;
3548 
3549   PetscFunctionBegin;
3550   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3551   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3552   /* Should this be the type of the diagonal block of A? */
3553   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3554   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3555   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3556   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3557   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3558   for (i=0;i<m;i++) {
3559     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3560     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3561     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3562   }
3563   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3564   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3565 
3566   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3567   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3568   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3569   sprintf(name,"%s.%d",outfile,rank);
3570   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3571   ierr = PetscFree(name);
3572   ierr = MatView(B,out);CHKERRQ(ierr);
3573   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3574   ierr = MatDestroy(B);CHKERRQ(ierr);
3575   PetscFunctionReturn(0);
3576 }
3577 
3578 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3579 #undef __FUNCT__
3580 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3581 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3582 {
3583   PetscErrorCode       ierr;
3584   Mat_Merge_SeqsToMPI  *merge;
3585   PetscContainer       container;
3586 
3587   PetscFunctionBegin;
3588   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3589   if (container) {
3590     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3591     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3592     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3593     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3594     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3595     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3596     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3597     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3598     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3599     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3600     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3601     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3602 
3603     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3604     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3605   }
3606   ierr = PetscFree(merge);CHKERRQ(ierr);
3607 
3608   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3609   PetscFunctionReturn(0);
3610 }
3611 
3612 #include "src/mat/utils/freespace.h"
3613 #include "petscbt.h"
3614 
3615 #undef __FUNCT__
3616 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3617 /*@C
3618       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3619                  matrices from each processor
3620 
3621     Collective on MPI_Comm
3622 
3623    Input Parameters:
3624 +    comm - the communicators the parallel matrix will live on
3625 .    seqmat - the input sequential matrices
3626 .    m - number of local rows (or PETSC_DECIDE)
3627 .    n - number of local columns (or PETSC_DECIDE)
3628 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3629 
3630    Output Parameter:
3631 .    mpimat - the parallel matrix generated
3632 
3633     Level: advanced
3634 
3635    Notes:
3636      The dimensions of the sequential matrix in each processor MUST be the same.
3637      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3638      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3639 @*/
3640 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3641 {
3642   PetscErrorCode       ierr;
3643   MPI_Comm             comm=((PetscObject)mpimat)->comm;
3644   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3645   PetscMPIInt          size,rank,taga,*len_s;
3646   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3647   PetscInt             proc,m;
3648   PetscInt             **buf_ri,**buf_rj;
3649   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3650   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3651   MPI_Request          *s_waits,*r_waits;
3652   MPI_Status           *status;
3653   MatScalar            *aa=a->a,**abuf_r,*ba_i;
3654   Mat_Merge_SeqsToMPI  *merge;
3655   PetscContainer       container;
3656 
3657   PetscFunctionBegin;
3658   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3659 
3660   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3661   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3662 
3663   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3664   if (container) {
3665     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3666   }
3667   bi     = merge->bi;
3668   bj     = merge->bj;
3669   buf_ri = merge->buf_ri;
3670   buf_rj = merge->buf_rj;
3671 
3672   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3673   owners = merge->rowmap.range;
3674   len_s  = merge->len_s;
3675 
3676   /* send and recv matrix values */
3677   /*-----------------------------*/
3678   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3679   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3680 
3681   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3682   for (proc=0,k=0; proc<size; proc++){
3683     if (!len_s[proc]) continue;
3684     i = owners[proc];
3685     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3686     k++;
3687   }
3688 
3689   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3690   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3691   ierr = PetscFree(status);CHKERRQ(ierr);
3692 
3693   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3694   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3695 
3696   /* insert mat values of mpimat */
3697   /*----------------------------*/
3698   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3699   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3700   nextrow = buf_ri_k + merge->nrecv;
3701   nextai  = nextrow + merge->nrecv;
3702 
3703   for (k=0; k<merge->nrecv; k++){
3704     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3705     nrows = *(buf_ri_k[k]);
3706     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3707     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3708   }
3709 
3710   /* set values of ba */
3711   m = merge->rowmap.n;
3712   for (i=0; i<m; i++) {
3713     arow = owners[rank] + i;
3714     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3715     bnzi = bi[i+1] - bi[i];
3716     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3717 
3718     /* add local non-zero vals of this proc's seqmat into ba */
3719     anzi = ai[arow+1] - ai[arow];
3720     aj   = a->j + ai[arow];
3721     aa   = a->a + ai[arow];
3722     nextaj = 0;
3723     for (j=0; nextaj<anzi; j++){
3724       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3725         ba_i[j] += aa[nextaj++];
3726       }
3727     }
3728 
3729     /* add received vals into ba */
3730     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3731       /* i-th row */
3732       if (i == *nextrow[k]) {
3733         anzi = *(nextai[k]+1) - *nextai[k];
3734         aj   = buf_rj[k] + *(nextai[k]);
3735         aa   = abuf_r[k] + *(nextai[k]);
3736         nextaj = 0;
3737         for (j=0; nextaj<anzi; j++){
3738           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3739             ba_i[j] += aa[nextaj++];
3740           }
3741         }
3742         nextrow[k]++; nextai[k]++;
3743       }
3744     }
3745     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3746   }
3747   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3748   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3749 
3750   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3751   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3752   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3753   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3754   PetscFunctionReturn(0);
3755 }
3756 
3757 #undef __FUNCT__
3758 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3759 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3760 {
3761   PetscErrorCode       ierr;
3762   Mat                  B_mpi;
3763   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3764   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3765   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3766   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3767   PetscInt             len,proc,*dnz,*onz;
3768   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3769   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3770   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3771   MPI_Status           *status;
3772   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3773   PetscBT              lnkbt;
3774   Mat_Merge_SeqsToMPI  *merge;
3775   PetscContainer       container;
3776 
3777   PetscFunctionBegin;
3778   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3779 
3780   /* make sure it is a PETSc comm */
3781   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3782   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3783   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3784 
3785   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3786   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3787 
3788   /* determine row ownership */
3789   /*---------------------------------------------------------*/
3790   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3791   merge->rowmap.n = m;
3792   merge->rowmap.N = M;
3793   merge->rowmap.bs = 1;
3794   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
3795   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3796   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3797 
3798   m      = merge->rowmap.n;
3799   M      = merge->rowmap.N;
3800   owners = merge->rowmap.range;
3801 
3802   /* determine the number of messages to send, their lengths */
3803   /*---------------------------------------------------------*/
3804   len_s  = merge->len_s;
3805 
3806   len = 0;  /* length of buf_si[] */
3807   merge->nsend = 0;
3808   for (proc=0; proc<size; proc++){
3809     len_si[proc] = 0;
3810     if (proc == rank){
3811       len_s[proc] = 0;
3812     } else {
3813       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3814       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3815     }
3816     if (len_s[proc]) {
3817       merge->nsend++;
3818       nrows = 0;
3819       for (i=owners[proc]; i<owners[proc+1]; i++){
3820         if (ai[i+1] > ai[i]) nrows++;
3821       }
3822       len_si[proc] = 2*(nrows+1);
3823       len += len_si[proc];
3824     }
3825   }
3826 
3827   /* determine the number and length of messages to receive for ij-structure */
3828   /*-------------------------------------------------------------------------*/
3829   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3830   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3831 
3832   /* post the Irecv of j-structure */
3833   /*-------------------------------*/
3834   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
3835   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3836 
3837   /* post the Isend of j-structure */
3838   /*--------------------------------*/
3839   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3840   sj_waits = si_waits + merge->nsend;
3841 
3842   for (proc=0, k=0; proc<size; proc++){
3843     if (!len_s[proc]) continue;
3844     i = owners[proc];
3845     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3846     k++;
3847   }
3848 
3849   /* receives and sends of j-structure are complete */
3850   /*------------------------------------------------*/
3851   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3852   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3853 
3854   /* send and recv i-structure */
3855   /*---------------------------*/
3856   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
3857   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3858 
3859   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3860   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3861   for (proc=0,k=0; proc<size; proc++){
3862     if (!len_s[proc]) continue;
3863     /* form outgoing message for i-structure:
3864          buf_si[0]:                 nrows to be sent
3865                [1:nrows]:           row index (global)
3866                [nrows+1:2*nrows+1]: i-structure index
3867     */
3868     /*-------------------------------------------*/
3869     nrows = len_si[proc]/2 - 1;
3870     buf_si_i    = buf_si + nrows+1;
3871     buf_si[0]   = nrows;
3872     buf_si_i[0] = 0;
3873     nrows = 0;
3874     for (i=owners[proc]; i<owners[proc+1]; i++){
3875       anzi = ai[i+1] - ai[i];
3876       if (anzi) {
3877         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3878         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3879         nrows++;
3880       }
3881     }
3882     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3883     k++;
3884     buf_si += len_si[proc];
3885   }
3886 
3887   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3888   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3889 
3890   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3891   for (i=0; i<merge->nrecv; i++){
3892     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3893   }
3894 
3895   ierr = PetscFree(len_si);CHKERRQ(ierr);
3896   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3897   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3898   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3899   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3900   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3901   ierr = PetscFree(status);CHKERRQ(ierr);
3902 
3903   /* compute a local seq matrix in each processor */
3904   /*----------------------------------------------*/
3905   /* allocate bi array and free space for accumulating nonzero column info */
3906   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3907   bi[0] = 0;
3908 
3909   /* create and initialize a linked list */
3910   nlnk = N+1;
3911   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3912 
3913   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3914   len = 0;
3915   len  = ai[owners[rank+1]] - ai[owners[rank]];
3916   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3917   current_space = free_space;
3918 
3919   /* determine symbolic info for each local row */
3920   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3921   nextrow = buf_ri_k + merge->nrecv;
3922   nextai  = nextrow + merge->nrecv;
3923   for (k=0; k<merge->nrecv; k++){
3924     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3925     nrows = *buf_ri_k[k];
3926     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3927     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3928   }
3929 
3930   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3931   len = 0;
3932   for (i=0;i<m;i++) {
3933     bnzi   = 0;
3934     /* add local non-zero cols of this proc's seqmat into lnk */
3935     arow   = owners[rank] + i;
3936     anzi   = ai[arow+1] - ai[arow];
3937     aj     = a->j + ai[arow];
3938     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3939     bnzi += nlnk;
3940     /* add received col data into lnk */
3941     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3942       if (i == *nextrow[k]) { /* i-th row */
3943         anzi = *(nextai[k]+1) - *nextai[k];
3944         aj   = buf_rj[k] + *nextai[k];
3945         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3946         bnzi += nlnk;
3947         nextrow[k]++; nextai[k]++;
3948       }
3949     }
3950     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3951 
3952     /* if free space is not available, make more free space */
3953     if (current_space->local_remaining<bnzi) {
3954       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3955       nspacedouble++;
3956     }
3957     /* copy data into free space, then initialize lnk */
3958     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3959     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3960 
3961     current_space->array           += bnzi;
3962     current_space->local_used      += bnzi;
3963     current_space->local_remaining -= bnzi;
3964 
3965     bi[i+1] = bi[i] + bnzi;
3966   }
3967 
3968   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3969 
3970   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3971   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3972   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3973 
3974   /* create symbolic parallel matrix B_mpi */
3975   /*---------------------------------------*/
3976   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3977   if (n==PETSC_DECIDE) {
3978     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3979   } else {
3980     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3981   }
3982   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3983   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3984   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3985 
3986   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3987   B_mpi->assembled     = PETSC_FALSE;
3988   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3989   merge->bi            = bi;
3990   merge->bj            = bj;
3991   merge->buf_ri        = buf_ri;
3992   merge->buf_rj        = buf_rj;
3993   merge->coi           = PETSC_NULL;
3994   merge->coj           = PETSC_NULL;
3995   merge->owners_co     = PETSC_NULL;
3996 
3997   /* attach the supporting struct to B_mpi for reuse */
3998   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3999   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4000   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4001   *mpimat = B_mpi;
4002 
4003   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4004   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4005   PetscFunctionReturn(0);
4006 }
4007 
4008 #undef __FUNCT__
4009 #define __FUNCT__ "MatMerge_SeqsToMPI"
4010 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4011 {
4012   PetscErrorCode   ierr;
4013 
4014   PetscFunctionBegin;
4015   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4016   if (scall == MAT_INITIAL_MATRIX){
4017     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4018   }
4019   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4020   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4021   PetscFunctionReturn(0);
4022 }
4023 
4024 #undef __FUNCT__
4025 #define __FUNCT__ "MatGetLocalMat"
4026 /*@
4027      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4028 
4029     Not Collective
4030 
4031    Input Parameters:
4032 +    A - the matrix
4033 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4034 
4035    Output Parameter:
4036 .    A_loc - the local sequential matrix generated
4037 
4038     Level: developer
4039 
4040 @*/
4041 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4042 {
4043   PetscErrorCode  ierr;
4044   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4045   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4046   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4047   PetscScalar     *aa=a->a,*ba=b->a,*ca;
4048   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
4049   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4050 
4051   PetscFunctionBegin;
4052   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4053   if (scall == MAT_INITIAL_MATRIX){
4054     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4055     ci[0] = 0;
4056     for (i=0; i<am; i++){
4057       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4058     }
4059     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4060     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4061     k = 0;
4062     for (i=0; i<am; i++) {
4063       ncols_o = bi[i+1] - bi[i];
4064       ncols_d = ai[i+1] - ai[i];
4065       /* off-diagonal portion of A */
4066       for (jo=0; jo<ncols_o; jo++) {
4067         col = cmap[*bj];
4068         if (col >= cstart) break;
4069         cj[k]   = col; bj++;
4070         ca[k++] = *ba++;
4071       }
4072       /* diagonal portion of A */
4073       for (j=0; j<ncols_d; j++) {
4074         cj[k]   = cstart + *aj++;
4075         ca[k++] = *aa++;
4076       }
4077       /* off-diagonal portion of A */
4078       for (j=jo; j<ncols_o; j++) {
4079         cj[k]   = cmap[*bj++];
4080         ca[k++] = *ba++;
4081       }
4082     }
4083     /* put together the new matrix */
4084     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4085     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4086     /* Since these are PETSc arrays, change flags to free them as necessary. */
4087     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4088     mat->free_a  = PETSC_TRUE;
4089     mat->free_ij = PETSC_TRUE;
4090     mat->nonew   = 0;
4091   } else if (scall == MAT_REUSE_MATRIX){
4092     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4093     ci = mat->i; cj = mat->j; ca = mat->a;
4094     for (i=0; i<am; i++) {
4095       /* off-diagonal portion of A */
4096       ncols_o = bi[i+1] - bi[i];
4097       for (jo=0; jo<ncols_o; jo++) {
4098         col = cmap[*bj];
4099         if (col >= cstart) break;
4100         *ca++ = *ba++; bj++;
4101       }
4102       /* diagonal portion of A */
4103       ncols_d = ai[i+1] - ai[i];
4104       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
4105       /* off-diagonal portion of A */
4106       for (j=jo; j<ncols_o; j++) {
4107         *ca++ = *ba++; bj++;
4108       }
4109     }
4110   } else {
4111     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4112   }
4113 
4114   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4115   PetscFunctionReturn(0);
4116 }
4117 
4118 #undef __FUNCT__
4119 #define __FUNCT__ "MatGetLocalMatCondensed"
4120 /*@C
4121      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4122 
4123     Not Collective
4124 
4125    Input Parameters:
4126 +    A - the matrix
4127 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4128 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4129 
4130    Output Parameter:
4131 .    A_loc - the local sequential matrix generated
4132 
4133     Level: developer
4134 
4135 @*/
4136 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4137 {
4138   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4139   PetscErrorCode    ierr;
4140   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4141   IS                isrowa,iscola;
4142   Mat               *aloc;
4143 
4144   PetscFunctionBegin;
4145   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4146   if (!row){
4147     start = A->rmap.rstart; end = A->rmap.rend;
4148     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4149   } else {
4150     isrowa = *row;
4151   }
4152   if (!col){
4153     start = A->cmap.rstart;
4154     cmap  = a->garray;
4155     nzA   = a->A->cmap.n;
4156     nzB   = a->B->cmap.n;
4157     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4158     ncols = 0;
4159     for (i=0; i<nzB; i++) {
4160       if (cmap[i] < start) idx[ncols++] = cmap[i];
4161       else break;
4162     }
4163     imark = i;
4164     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4165     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4166     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4167     ierr = PetscFree(idx);CHKERRQ(ierr);
4168   } else {
4169     iscola = *col;
4170   }
4171   if (scall != MAT_INITIAL_MATRIX){
4172     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4173     aloc[0] = *A_loc;
4174   }
4175   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4176   *A_loc = aloc[0];
4177   ierr = PetscFree(aloc);CHKERRQ(ierr);
4178   if (!row){
4179     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4180   }
4181   if (!col){
4182     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4183   }
4184   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4185   PetscFunctionReturn(0);
4186 }
4187 
4188 #undef __FUNCT__
4189 #define __FUNCT__ "MatGetBrowsOfAcols"
4190 /*@C
4191     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4192 
4193     Collective on Mat
4194 
4195    Input Parameters:
4196 +    A,B - the matrices in mpiaij format
4197 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4198 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4199 
4200    Output Parameter:
4201 +    rowb, colb - index sets of rows and columns of B to extract
4202 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
4203 -    B_seq - the sequential matrix generated
4204 
4205     Level: developer
4206 
4207 @*/
4208 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4209 {
4210   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4211   PetscErrorCode    ierr;
4212   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4213   IS                isrowb,iscolb;
4214   Mat               *bseq;
4215 
4216   PetscFunctionBegin;
4217   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4218     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4219   }
4220   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4221 
4222   if (scall == MAT_INITIAL_MATRIX){
4223     start = A->cmap.rstart;
4224     cmap  = a->garray;
4225     nzA   = a->A->cmap.n;
4226     nzB   = a->B->cmap.n;
4227     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4228     ncols = 0;
4229     for (i=0; i<nzB; i++) {  /* row < local row index */
4230       if (cmap[i] < start) idx[ncols++] = cmap[i];
4231       else break;
4232     }
4233     imark = i;
4234     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4235     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4236     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4237     ierr = PetscFree(idx);CHKERRQ(ierr);
4238     *brstart = imark;
4239     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
4240   } else {
4241     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4242     isrowb = *rowb; iscolb = *colb;
4243     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4244     bseq[0] = *B_seq;
4245   }
4246   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4247   *B_seq = bseq[0];
4248   ierr = PetscFree(bseq);CHKERRQ(ierr);
4249   if (!rowb){
4250     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4251   } else {
4252     *rowb = isrowb;
4253   }
4254   if (!colb){
4255     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4256   } else {
4257     *colb = iscolb;
4258   }
4259   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4260   PetscFunctionReturn(0);
4261 }
4262 
4263 #undef __FUNCT__
4264 #define __FUNCT__ "MatGetBrowsOfAoCols"
4265 /*@C
4266     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4267     of the OFF-DIAGONAL portion of local A
4268 
4269     Collective on Mat
4270 
4271    Input Parameters:
4272 +    A,B - the matrices in mpiaij format
4273 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4274 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4275 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4276 
4277    Output Parameter:
4278 +    B_oth - the sequential matrix generated
4279 
4280     Level: developer
4281 
4282 @*/
4283 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
4284 {
4285   VecScatter_MPI_General *gen_to,*gen_from;
4286   PetscErrorCode         ierr;
4287   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4288   Mat_SeqAIJ             *b_oth;
4289   VecScatter             ctx=a->Mvctx;
4290   MPI_Comm               comm=((PetscObject)ctx)->comm;
4291   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4292   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
4293   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
4294   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4295   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4296   MPI_Status             *sstatus,rstatus;
4297   PetscMPIInt            jj;
4298   PetscInt               *cols,sbs,rbs;
4299   PetscScalar            *vals;
4300 
4301   PetscFunctionBegin;
4302   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4303     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4304   }
4305   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4306   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4307 
4308   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4309   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4310   rvalues  = gen_from->values; /* holds the length of receiving row */
4311   svalues  = gen_to->values;   /* holds the length of sending row */
4312   nrecvs   = gen_from->n;
4313   nsends   = gen_to->n;
4314 
4315   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4316   srow     = gen_to->indices;   /* local row index to be sent */
4317   sstarts  = gen_to->starts;
4318   sprocs   = gen_to->procs;
4319   sstatus  = gen_to->sstatus;
4320   sbs      = gen_to->bs;
4321   rstarts  = gen_from->starts;
4322   rprocs   = gen_from->procs;
4323   rbs      = gen_from->bs;
4324 
4325   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4326   if (scall == MAT_INITIAL_MATRIX){
4327     /* i-array */
4328     /*---------*/
4329     /*  post receives */
4330     for (i=0; i<nrecvs; i++){
4331       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4332       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4333       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4334     }
4335 
4336     /* pack the outgoing message */
4337     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4338     rstartsj = sstartsj + nsends +1;
4339     sstartsj[0] = 0;  rstartsj[0] = 0;
4340     len = 0; /* total length of j or a array to be sent */
4341     k = 0;
4342     for (i=0; i<nsends; i++){
4343       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4344       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4345       for (j=0; j<nrows; j++) {
4346         row = srow[k] + B->rmap.range[rank]; /* global row idx */
4347         for (l=0; l<sbs; l++){
4348           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4349           rowlen[j*sbs+l] = ncols;
4350           len += ncols;
4351           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4352         }
4353         k++;
4354       }
4355       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4356       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4357     }
4358     /* recvs and sends of i-array are completed */
4359     i = nrecvs;
4360     while (i--) {
4361       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4362     }
4363     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4364 
4365     /* allocate buffers for sending j and a arrays */
4366     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4367     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4368 
4369     /* create i-array of B_oth */
4370     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4371     b_othi[0] = 0;
4372     len = 0; /* total length of j or a array to be received */
4373     k = 0;
4374     for (i=0; i<nrecvs; i++){
4375       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4376       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4377       for (j=0; j<nrows; j++) {
4378         b_othi[k+1] = b_othi[k] + rowlen[j];
4379         len += rowlen[j]; k++;
4380       }
4381       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4382     }
4383 
4384     /* allocate space for j and a arrrays of B_oth */
4385     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4386     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
4387 
4388     /* j-array */
4389     /*---------*/
4390     /*  post receives of j-array */
4391     for (i=0; i<nrecvs; i++){
4392       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4393       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4394     }
4395 
4396     /* pack the outgoing message j-array */
4397     k = 0;
4398     for (i=0; i<nsends; i++){
4399       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4400       bufJ = bufj+sstartsj[i];
4401       for (j=0; j<nrows; j++) {
4402         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4403         for (ll=0; ll<sbs; ll++){
4404           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4405           for (l=0; l<ncols; l++){
4406             *bufJ++ = cols[l];
4407           }
4408           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4409         }
4410       }
4411       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4412     }
4413 
4414     /* recvs and sends of j-array are completed */
4415     i = nrecvs;
4416     while (i--) {
4417       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4418     }
4419     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4420   } else if (scall == MAT_REUSE_MATRIX){
4421     sstartsj = *startsj;
4422     rstartsj = sstartsj + nsends +1;
4423     bufa     = *bufa_ptr;
4424     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4425     b_otha   = b_oth->a;
4426   } else {
4427     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4428   }
4429 
4430   /* a-array */
4431   /*---------*/
4432   /*  post receives of a-array */
4433   for (i=0; i<nrecvs; i++){
4434     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4435     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4436   }
4437 
4438   /* pack the outgoing message a-array */
4439   k = 0;
4440   for (i=0; i<nsends; i++){
4441     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4442     bufA = bufa+sstartsj[i];
4443     for (j=0; j<nrows; j++) {
4444       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4445       for (ll=0; ll<sbs; ll++){
4446         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4447         for (l=0; l<ncols; l++){
4448           *bufA++ = vals[l];
4449         }
4450         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4451       }
4452     }
4453     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4454   }
4455   /* recvs and sends of a-array are completed */
4456   i = nrecvs;
4457   while (i--) {
4458     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4459   }
4460   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4461   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4462 
4463   if (scall == MAT_INITIAL_MATRIX){
4464     /* put together the new matrix */
4465     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4466 
4467     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4468     /* Since these are PETSc arrays, change flags to free them as necessary. */
4469     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4470     b_oth->free_a  = PETSC_TRUE;
4471     b_oth->free_ij = PETSC_TRUE;
4472     b_oth->nonew   = 0;
4473 
4474     ierr = PetscFree(bufj);CHKERRQ(ierr);
4475     if (!startsj || !bufa_ptr){
4476       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4477       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4478     } else {
4479       *startsj  = sstartsj;
4480       *bufa_ptr = bufa;
4481     }
4482   }
4483   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4484   PetscFunctionReturn(0);
4485 }
4486 
4487 #undef __FUNCT__
4488 #define __FUNCT__ "MatGetCommunicationStructs"
4489 /*@C
4490   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4491 
4492   Not Collective
4493 
4494   Input Parameters:
4495 . A - The matrix in mpiaij format
4496 
4497   Output Parameter:
4498 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4499 . colmap - A map from global column index to local index into lvec
4500 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4501 
4502   Level: developer
4503 
4504 @*/
4505 #if defined (PETSC_USE_CTABLE)
4506 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4507 #else
4508 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4509 #endif
4510 {
4511   Mat_MPIAIJ *a;
4512 
4513   PetscFunctionBegin;
4514   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4515   PetscValidPointer(lvec, 2)
4516   PetscValidPointer(colmap, 3)
4517   PetscValidPointer(multScatter, 4)
4518   a = (Mat_MPIAIJ *) A->data;
4519   if (lvec) *lvec = a->lvec;
4520   if (colmap) *colmap = a->colmap;
4521   if (multScatter) *multScatter = a->Mvctx;
4522   PetscFunctionReturn(0);
4523 }
4524 
4525 EXTERN_C_BEGIN
4526 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*);
4527 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*);
4528 EXTERN_C_END
4529 
4530 /*MC
4531    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4532 
4533    Options Database Keys:
4534 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4535 
4536   Level: beginner
4537 
4538 .seealso: MatCreateMPIAIJ()
4539 M*/
4540 
4541 EXTERN_C_BEGIN
4542 #undef __FUNCT__
4543 #define __FUNCT__ "MatCreate_MPIAIJ"
4544 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4545 {
4546   Mat_MPIAIJ     *b;
4547   PetscErrorCode ierr;
4548   PetscMPIInt    size;
4549 
4550   PetscFunctionBegin;
4551   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
4552 
4553   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
4554   B->data         = (void*)b;
4555   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4556   B->factor       = 0;
4557   B->rmap.bs      = 1;
4558   B->assembled    = PETSC_FALSE;
4559   B->mapping      = 0;
4560 
4561   B->insertmode      = NOT_SET_VALUES;
4562   b->size            = size;
4563   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
4564 
4565   /* build cache for off array entries formed */
4566   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
4567   b->donotstash  = PETSC_FALSE;
4568   b->colmap      = 0;
4569   b->garray      = 0;
4570   b->roworiented = PETSC_TRUE;
4571 
4572   /* stuff used for matrix vector multiply */
4573   b->lvec      = PETSC_NULL;
4574   b->Mvctx     = PETSC_NULL;
4575 
4576   /* stuff for MatGetRow() */
4577   b->rowindices   = 0;
4578   b->rowvalues    = 0;
4579   b->getrowactive = PETSC_FALSE;
4580 
4581 
4582   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4583                                      "MatStoreValues_MPIAIJ",
4584                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4585   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4586                                      "MatRetrieveValues_MPIAIJ",
4587                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4588   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4589 				     "MatGetDiagonalBlock_MPIAIJ",
4590                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4591   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4592 				     "MatIsTranspose_MPIAIJ",
4593 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4594   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4595 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4596 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4597   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4598 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4599 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4600   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4601 				     "MatDiagonalScaleLocal_MPIAIJ",
4602 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4603   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4604                                      "MatConvert_MPIAIJ_MPICSRPERM",
4605                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
4606   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4607                                      "MatConvert_MPIAIJ_MPICRL",
4608                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
4609   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4610   PetscFunctionReturn(0);
4611 }
4612 EXTERN_C_END
4613 
4614 #undef __FUNCT__
4615 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
4616 /*@
4617      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4618          and "off-diagonal" part of the matrix in CSR format.
4619 
4620    Collective on MPI_Comm
4621 
4622    Input Parameters:
4623 +  comm - MPI communicator
4624 .  m - number of local rows (Cannot be PETSC_DECIDE)
4625 .  n - This value should be the same as the local size used in creating the
4626        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4627        calculated if N is given) For square matrices n is almost always m.
4628 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4629 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4630 .   i - row indices for "diagonal" portion of matrix
4631 .   j - column indices
4632 .   a - matrix values
4633 .   oi - row indices for "off-diagonal" portion of matrix
4634 .   oj - column indices
4635 -   oa - matrix values
4636 
4637    Output Parameter:
4638 .   mat - the matrix
4639 
4640    Level: advanced
4641 
4642    Notes:
4643        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4644 
4645        The i and j indices are 0 based
4646 
4647        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4648 
4649 
4650 .keywords: matrix, aij, compressed row, sparse, parallel
4651 
4652 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4653           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4654 @*/
4655 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4656 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4657 {
4658   PetscErrorCode ierr;
4659   Mat_MPIAIJ     *maij;
4660 
4661  PetscFunctionBegin;
4662   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4663   if (i[0]) {
4664     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4665   }
4666   if (oi[0]) {
4667     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4668   }
4669   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4670   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4671   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4672   maij = (Mat_MPIAIJ*) (*mat)->data;
4673   maij->donotstash     = PETSC_TRUE;
4674   (*mat)->preallocated = PETSC_TRUE;
4675 
4676   (*mat)->rmap.bs = (*mat)->cmap.bs = 1;
4677   ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr);
4678   ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr);
4679 
4680   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
4681   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
4682 
4683   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4684   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4685   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4686   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4687 
4688   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4689   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4690   PetscFunctionReturn(0);
4691 }
4692 
4693 /*
4694     Special version for direct calls from Fortran
4695 */
4696 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4697 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4698 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4699 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4700 #endif
4701 
4702 /* Change these macros so can be used in void function */
4703 #undef CHKERRQ
4704 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
4705 #undef SETERRQ2
4706 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
4707 #undef SETERRQ
4708 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
4709 
4710 EXTERN_C_BEGIN
4711 #undef __FUNCT__
4712 #define __FUNCT__ "matsetvaluesmpiaij_"
4713 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4714 {
4715   Mat            mat = *mmat;
4716   PetscInt       m = *mm, n = *mn;
4717   InsertMode     addv = *maddv;
4718   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
4719   PetscScalar    value;
4720   PetscErrorCode ierr;
4721 
4722   MatPreallocated(mat);
4723   if (mat->insertmode == NOT_SET_VALUES) {
4724     mat->insertmode = addv;
4725   }
4726 #if defined(PETSC_USE_DEBUG)
4727   else if (mat->insertmode != addv) {
4728     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4729   }
4730 #endif
4731   {
4732   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4733   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4734   PetscTruth     roworiented = aij->roworiented;
4735 
4736   /* Some Variables required in the macro */
4737   Mat            A = aij->A;
4738   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
4739   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4740   PetscScalar    *aa = a->a;
4741   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4742   Mat            B = aij->B;
4743   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4744   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4745   PetscScalar    *ba = b->a;
4746 
4747   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4748   PetscInt       nonew = a->nonew;
4749   PetscScalar    *ap1,*ap2;
4750 
4751   PetscFunctionBegin;
4752   for (i=0; i<m; i++) {
4753     if (im[i] < 0) continue;
4754 #if defined(PETSC_USE_DEBUG)
4755     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4756 #endif
4757     if (im[i] >= rstart && im[i] < rend) {
4758       row      = im[i] - rstart;
4759       lastcol1 = -1;
4760       rp1      = aj + ai[row];
4761       ap1      = aa + ai[row];
4762       rmax1    = aimax[row];
4763       nrow1    = ailen[row];
4764       low1     = 0;
4765       high1    = nrow1;
4766       lastcol2 = -1;
4767       rp2      = bj + bi[row];
4768       ap2      = ba + bi[row];
4769       rmax2    = bimax[row];
4770       nrow2    = bilen[row];
4771       low2     = 0;
4772       high2    = nrow2;
4773 
4774       for (j=0; j<n; j++) {
4775         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4776         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4777         if (in[j] >= cstart && in[j] < cend){
4778           col = in[j] - cstart;
4779           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4780         } else if (in[j] < 0) continue;
4781 #if defined(PETSC_USE_DEBUG)
4782         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4783 #endif
4784         else {
4785           if (mat->was_assembled) {
4786             if (!aij->colmap) {
4787               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4788             }
4789 #if defined (PETSC_USE_CTABLE)
4790             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4791 	    col--;
4792 #else
4793             col = aij->colmap[in[j]] - 1;
4794 #endif
4795             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4796               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4797               col =  in[j];
4798               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4799               B = aij->B;
4800               b = (Mat_SeqAIJ*)B->data;
4801               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4802               rp2      = bj + bi[row];
4803               ap2      = ba + bi[row];
4804               rmax2    = bimax[row];
4805               nrow2    = bilen[row];
4806               low2     = 0;
4807               high2    = nrow2;
4808               bm       = aij->B->rmap.n;
4809               ba = b->a;
4810             }
4811           } else col = in[j];
4812           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4813         }
4814       }
4815     } else {
4816       if (!aij->donotstash) {
4817         if (roworiented) {
4818           if (ignorezeroentries && v[i*n] == 0.0) continue;
4819           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4820         } else {
4821           if (ignorezeroentries && v[i] == 0.0) continue;
4822           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4823         }
4824       }
4825     }
4826   }}
4827   PetscFunctionReturnVoid();
4828 }
4829 EXTERN_C_END
4830 
4831