xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 71f87433c2d5ca8a2055e7bac4e3ff8159d57028)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatGetValues_MPIAIJ"
240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
241 {
242   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
243   PetscErrorCode ierr;
244   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
245   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
246 
247   PetscFunctionBegin;
248   for (i=0; i<m; i++) {
249     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
250     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
251     if (idxm[i] >= rstart && idxm[i] < rend) {
252       row = idxm[i] - rstart;
253       for (j=0; j<n; j++) {
254         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
255         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
256         if (idxn[j] >= cstart && idxn[j] < cend){
257           col = idxn[j] - cstart;
258           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
259         } else {
260           if (!aij->colmap) {
261             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
262           }
263 #if defined (PETSC_USE_CTABLE)
264           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
265           col --;
266 #else
267           col = aij->colmap[idxn[j]] - 1;
268 #endif
269           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
270           else {
271             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
272           }
273         }
274       }
275     } else {
276       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
277     }
278   }
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
287   PetscErrorCode ierr;
288   PetscInt       nstash,reallocs;
289   InsertMode     addv;
290 
291   PetscFunctionBegin;
292   if (aij->donotstash) {
293     PetscFunctionReturn(0);
294   }
295 
296   /* make sure all processors are either in INSERTMODE or ADDMODE */
297   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
298   if (addv == (ADD_VALUES|INSERT_VALUES)) {
299     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
300   }
301   mat->insertmode = addv; /* in case this processor had no cache */
302 
303   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
304   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
305   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
312 {
313   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
314   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
315   PetscErrorCode ierr;
316   PetscMPIInt    n;
317   PetscInt       i,j,rstart,ncols,flg;
318   PetscInt       *row,*col,other_disassembled;
319   PetscScalar    *val;
320   InsertMode     addv = mat->insertmode;
321 
322   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
323   PetscFunctionBegin;
324   if (!aij->donotstash) {
325     while (1) {
326       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
327       if (!flg) break;
328 
329       for (i=0; i<n;) {
330         /* Now identify the consecutive vals belonging to the same row */
331         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
332         if (j < n) ncols = j-i;
333         else       ncols = n-i;
334         /* Now assemble all these values with a single function call */
335         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
336         i = j;
337       }
338     }
339     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
340   }
341   a->compressedrow.use     = PETSC_FALSE;
342   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
344 
345   /* determine if any processor has disassembled, if so we must
346      also disassemble ourselfs, in order that we may reassemble. */
347   /*
348      if nonzero structure of submatrix B cannot change then we know that
349      no processor disassembled thus we can skip this stuff
350   */
351   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
352     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
353     if (mat->was_assembled && !other_disassembled) {
354       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
355     }
356   }
357   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
358     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
359   }
360   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
361   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
362   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
363   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
364 
365   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
366   aij->rowvalues = 0;
367 
368   /* used by MatAXPY() */
369   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
370   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
371 
372   PetscFunctionReturn(0);
373 }
374 
375 #undef __FUNCT__
376 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
377 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
378 {
379   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
380   PetscErrorCode ierr;
381 
382   PetscFunctionBegin;
383   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
384   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
385   PetscFunctionReturn(0);
386 }
387 
388 #undef __FUNCT__
389 #define __FUNCT__ "MatZeroRows_MPIAIJ"
390 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
391 {
392   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
393   PetscErrorCode ierr;
394   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
395   PetscInt       i,*owners = A->rmap.range;
396   PetscInt       *nprocs,j,idx,nsends,row;
397   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
398   PetscInt       *rvalues,count,base,slen,*source;
399   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
400   MPI_Comm       comm = A->comm;
401   MPI_Request    *send_waits,*recv_waits;
402   MPI_Status     recv_status,*send_status;
403 #if defined(PETSC_DEBUG)
404   PetscTruth     found = PETSC_FALSE;
405 #endif
406 
407   PetscFunctionBegin;
408   /*  first count number of contributors to each processor */
409   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
410   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
411   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
412   j = 0;
413   for (i=0; i<N; i++) {
414     if (lastidx > (idx = rows[i])) j = 0;
415     lastidx = idx;
416     for (; j<size; j++) {
417       if (idx >= owners[j] && idx < owners[j+1]) {
418         nprocs[2*j]++;
419         nprocs[2*j+1] = 1;
420         owner[i] = j;
421 #if defined(PETSC_DEBUG)
422         found = PETSC_TRUE;
423 #endif
424         break;
425       }
426     }
427 #if defined(PETSC_DEBUG)
428     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
429     found = PETSC_FALSE;
430 #endif
431   }
432   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
433 
434   /* inform other processors of number of messages and max length*/
435   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
436 
437   /* post receives:   */
438   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
439   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
440   for (i=0; i<nrecvs; i++) {
441     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
442   }
443 
444   /* do sends:
445       1) starts[i] gives the starting index in svalues for stuff going to
446          the ith processor
447   */
448   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
449   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
450   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
451   starts[0] = 0;
452   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
453   for (i=0; i<N; i++) {
454     svalues[starts[owner[i]]++] = rows[i];
455   }
456 
457   starts[0] = 0;
458   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
459   count = 0;
460   for (i=0; i<size; i++) {
461     if (nprocs[2*i+1]) {
462       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
463     }
464   }
465   ierr = PetscFree(starts);CHKERRQ(ierr);
466 
467   base = owners[rank];
468 
469   /*  wait on receives */
470   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
471   source = lens + nrecvs;
472   count  = nrecvs; slen = 0;
473   while (count) {
474     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
475     /* unpack receives into our local space */
476     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
477     source[imdex]  = recv_status.MPI_SOURCE;
478     lens[imdex]    = n;
479     slen          += n;
480     count--;
481   }
482   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
483 
484   /* move the data into the send scatter */
485   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
486   count = 0;
487   for (i=0; i<nrecvs; i++) {
488     values = rvalues + i*nmax;
489     for (j=0; j<lens[i]; j++) {
490       lrows[count++] = values[j] - base;
491     }
492   }
493   ierr = PetscFree(rvalues);CHKERRQ(ierr);
494   ierr = PetscFree(lens);CHKERRQ(ierr);
495   ierr = PetscFree(owner);CHKERRQ(ierr);
496   ierr = PetscFree(nprocs);CHKERRQ(ierr);
497 
498   /* actually zap the local rows */
499   /*
500         Zero the required rows. If the "diagonal block" of the matrix
501      is square and the user wishes to set the diagonal we use separate
502      code so that MatSetValues() is not called for each diagonal allocating
503      new memory, thus calling lots of mallocs and slowing things down.
504 
505        Contributed by: Matthew Knepley
506   */
507   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
508   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
509   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
510     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
511   } else if (diag != 0.0) {
512     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
513     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
514       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
515 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
516     }
517     for (i = 0; i < slen; i++) {
518       row  = lrows[i] + rstart;
519       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
520     }
521     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
522     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
523   } else {
524     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
525   }
526   ierr = PetscFree(lrows);CHKERRQ(ierr);
527 
528   /* wait on sends */
529   if (nsends) {
530     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
531     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
532     ierr = PetscFree(send_status);CHKERRQ(ierr);
533   }
534   ierr = PetscFree(send_waits);CHKERRQ(ierr);
535   ierr = PetscFree(svalues);CHKERRQ(ierr);
536 
537   PetscFunctionReturn(0);
538 }
539 
540 #undef __FUNCT__
541 #define __FUNCT__ "MatMult_MPIAIJ"
542 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
543 {
544   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
545   PetscErrorCode ierr;
546   PetscInt       nt;
547 
548   PetscFunctionBegin;
549   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
550   if (nt != A->cmap.n) {
551     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
552   }
553   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
554   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
555   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
556   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatMultAdd_MPIAIJ"
562 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
563 {
564   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
565   PetscErrorCode ierr;
566 
567   PetscFunctionBegin;
568   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
569   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
570   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
571   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 #undef __FUNCT__
576 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
577 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
578 {
579   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
580   PetscErrorCode ierr;
581   PetscTruth     merged;
582 
583   PetscFunctionBegin;
584   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
585   /* do nondiagonal part */
586   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
587   if (!merged) {
588     /* send it on its way */
589     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
590     /* do local part */
591     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
592     /* receive remote parts: note this assumes the values are not actually */
593     /* added in yy until the next line, */
594     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
595   } else {
596     /* do local part */
597     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
598     /* send it on its way */
599     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
600     /* values actually were received in the Begin() but we need to call this nop */
601     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
602   }
603   PetscFunctionReturn(0);
604 }
605 
606 EXTERN_C_BEGIN
607 #undef __FUNCT__
608 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
609 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
610 {
611   MPI_Comm       comm;
612   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
613   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
614   IS             Me,Notme;
615   PetscErrorCode ierr;
616   PetscInt       M,N,first,last,*notme,i;
617   PetscMPIInt    size;
618 
619   PetscFunctionBegin;
620 
621   /* Easy test: symmetric diagonal block */
622   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
623   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
624   if (!*f) PetscFunctionReturn(0);
625   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
626   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
627   if (size == 1) PetscFunctionReturn(0);
628 
629   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
630   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
631   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
632   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
633   for (i=0; i<first; i++) notme[i] = i;
634   for (i=last; i<M; i++) notme[i-last+first] = i;
635   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
636   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
637   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
638   Aoff = Aoffs[0];
639   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
640   Boff = Boffs[0];
641   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
642   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
643   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
644   ierr = ISDestroy(Me);CHKERRQ(ierr);
645   ierr = ISDestroy(Notme);CHKERRQ(ierr);
646 
647   PetscFunctionReturn(0);
648 }
649 EXTERN_C_END
650 
651 #undef __FUNCT__
652 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
653 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
654 {
655   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
656   PetscErrorCode ierr;
657 
658   PetscFunctionBegin;
659   /* do nondiagonal part */
660   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
661   /* send it on its way */
662   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
663   /* do local part */
664   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
665   /* receive remote parts */
666   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
667   PetscFunctionReturn(0);
668 }
669 
670 /*
671   This only works correctly for square matrices where the subblock A->A is the
672    diagonal block
673 */
674 #undef __FUNCT__
675 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
676 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
677 {
678   PetscErrorCode ierr;
679   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
680 
681   PetscFunctionBegin;
682   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
683   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
684     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
685   }
686   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 
690 #undef __FUNCT__
691 #define __FUNCT__ "MatScale_MPIAIJ"
692 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
693 {
694   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
695   PetscErrorCode ierr;
696 
697   PetscFunctionBegin;
698   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
699   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
700   PetscFunctionReturn(0);
701 }
702 
703 #undef __FUNCT__
704 #define __FUNCT__ "MatDestroy_MPIAIJ"
705 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
706 {
707   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
708   PetscErrorCode ierr;
709 
710   PetscFunctionBegin;
711 #if defined(PETSC_USE_LOG)
712   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
713 #endif
714   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
715   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
716   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
717 #if defined (PETSC_USE_CTABLE)
718   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
719 #else
720   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
721 #endif
722   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
723   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
724   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
725   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
726   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
727   ierr = PetscFree(aij);CHKERRQ(ierr);
728 
729   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
730   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
736   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatView_MPIAIJ_Binary"
742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
743 {
744   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
745   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
746   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
747   PetscErrorCode    ierr;
748   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
749   int               fd;
750   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
751   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
752   PetscScalar       *column_values;
753 
754   PetscFunctionBegin;
755   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
756   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
757   nz   = A->nz + B->nz;
758   if (!rank) {
759     header[0] = MAT_FILE_COOKIE;
760     header[1] = mat->rmap.N;
761     header[2] = mat->cmap.N;
762     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
763     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
764     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
765     /* get largest number of rows any processor has */
766     rlen = mat->rmap.n;
767     range = mat->rmap.range;
768     for (i=1; i<size; i++) {
769       rlen = PetscMax(rlen,range[i+1] - range[i]);
770     }
771   } else {
772     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
773     rlen = mat->rmap.n;
774   }
775 
776   /* load up the local row counts */
777   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
778   for (i=0; i<mat->rmap.n; i++) {
779     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
780   }
781 
782   /* store the row lengths to the file */
783   if (!rank) {
784     MPI_Status status;
785     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
786     for (i=1; i<size; i++) {
787       rlen = range[i+1] - range[i];
788       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
789       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
790     }
791   } else {
792     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
793   }
794   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
795 
796   /* load up the local column indices */
797   nzmax = nz; /* )th processor needs space a largest processor needs */
798   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
799   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
800   cnt  = 0;
801   for (i=0; i<mat->rmap.n; i++) {
802     for (j=B->i[i]; j<B->i[i+1]; j++) {
803       if ( (col = garray[B->j[j]]) > cstart) break;
804       column_indices[cnt++] = col;
805     }
806     for (k=A->i[i]; k<A->i[i+1]; k++) {
807       column_indices[cnt++] = A->j[k] + cstart;
808     }
809     for (; j<B->i[i+1]; j++) {
810       column_indices[cnt++] = garray[B->j[j]];
811     }
812   }
813   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
814 
815   /* store the column indices to the file */
816   if (!rank) {
817     MPI_Status status;
818     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
819     for (i=1; i<size; i++) {
820       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
821       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
822       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
823       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
824     }
825   } else {
826     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
827     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
828   }
829   ierr = PetscFree(column_indices);CHKERRQ(ierr);
830 
831   /* load up the local column values */
832   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
833   cnt  = 0;
834   for (i=0; i<mat->rmap.n; i++) {
835     for (j=B->i[i]; j<B->i[i+1]; j++) {
836       if ( garray[B->j[j]] > cstart) break;
837       column_values[cnt++] = B->a[j];
838     }
839     for (k=A->i[i]; k<A->i[i+1]; k++) {
840       column_values[cnt++] = A->a[k];
841     }
842     for (; j<B->i[i+1]; j++) {
843       column_values[cnt++] = B->a[j];
844     }
845   }
846   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
847 
848   /* store the column values to the file */
849   if (!rank) {
850     MPI_Status status;
851     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
852     for (i=1; i<size; i++) {
853       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
854       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
855       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
856       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
857     }
858   } else {
859     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
860     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
861   }
862   ierr = PetscFree(column_values);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
869 {
870   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
871   PetscErrorCode    ierr;
872   PetscMPIInt       rank = aij->rank,size = aij->size;
873   PetscTruth        isdraw,iascii,isbinary;
874   PetscViewer       sviewer;
875   PetscViewerFormat format;
876 
877   PetscFunctionBegin;
878   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
880   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
881   if (iascii) {
882     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
883     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
884       MatInfo    info;
885       PetscTruth inodes;
886 
887       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
888       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
889       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
890       if (!inodes) {
891         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
892 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
893       } else {
894         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
895 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
896       }
897       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
898       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
899       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
900       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
901       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
902       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
903       PetscFunctionReturn(0);
904     } else if (format == PETSC_VIEWER_ASCII_INFO) {
905       PetscInt   inodecount,inodelimit,*inodes;
906       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
907       if (inodes) {
908         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
909       } else {
910         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
911       }
912       PetscFunctionReturn(0);
913     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
914       PetscFunctionReturn(0);
915     }
916   } else if (isbinary) {
917     if (size == 1) {
918       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
919       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
920     } else {
921       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
922     }
923     PetscFunctionReturn(0);
924   } else if (isdraw) {
925     PetscDraw  draw;
926     PetscTruth isnull;
927     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
928     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
929   }
930 
931   if (size == 1) {
932     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
933     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
934   } else {
935     /* assemble the entire matrix onto first processor. */
936     Mat         A;
937     Mat_SeqAIJ  *Aloc;
938     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
939     PetscScalar *a;
940 
941     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
942     if (!rank) {
943       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
944     } else {
945       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
946     }
947     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
948     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
949     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
950     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
951 
952     /* copy over the A part */
953     Aloc = (Mat_SeqAIJ*)aij->A->data;
954     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
955     row = mat->rmap.rstart;
956     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
957     for (i=0; i<m; i++) {
958       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
959       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
960     }
961     aj = Aloc->j;
962     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
963 
964     /* copy over the B part */
965     Aloc = (Mat_SeqAIJ*)aij->B->data;
966     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
967     row  = mat->rmap.rstart;
968     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
969     ct   = cols;
970     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
971     for (i=0; i<m; i++) {
972       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
973       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
974     }
975     ierr = PetscFree(ct);CHKERRQ(ierr);
976     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978     /*
979        Everyone has to call to draw the matrix since the graphics waits are
980        synchronized across all processors that share the PetscDraw object
981     */
982     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
983     if (!rank) {
984       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
985       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
986     }
987     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
988     ierr = MatDestroy(A);CHKERRQ(ierr);
989   }
990   PetscFunctionReturn(0);
991 }
992 
993 #undef __FUNCT__
994 #define __FUNCT__ "MatView_MPIAIJ"
995 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
996 {
997   PetscErrorCode ierr;
998   PetscTruth     iascii,isdraw,issocket,isbinary;
999 
1000   PetscFunctionBegin;
1001   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1002   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1003   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1004   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1005   if (iascii || isdraw || isbinary || issocket) {
1006     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1007   } else {
1008     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1009   }
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 #undef __FUNCT__
1014 #define __FUNCT__ "MatRelax_MPIAIJ"
1015 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1016 {
1017   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1018   PetscErrorCode ierr;
1019   Vec            bb1;
1020 
1021   PetscFunctionBegin;
1022   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1023 
1024   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1025 
1026   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1027     if (flag & SOR_ZERO_INITIAL_GUESS) {
1028       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1029       its--;
1030     }
1031 
1032     while (its--) {
1033       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1034       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1035 
1036       /* update rhs: bb1 = bb - B*x */
1037       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1038       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1039 
1040       /* local sweep */
1041       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1042       CHKERRQ(ierr);
1043     }
1044   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1045     if (flag & SOR_ZERO_INITIAL_GUESS) {
1046       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1047       its--;
1048     }
1049     while (its--) {
1050       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1051       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1052 
1053       /* update rhs: bb1 = bb - B*x */
1054       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1055       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1056 
1057       /* local sweep */
1058       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1059       CHKERRQ(ierr);
1060     }
1061   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1062     if (flag & SOR_ZERO_INITIAL_GUESS) {
1063       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1064       its--;
1065     }
1066     while (its--) {
1067       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1068       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1069 
1070       /* update rhs: bb1 = bb - B*x */
1071       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1072       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1073 
1074       /* local sweep */
1075       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1076       CHKERRQ(ierr);
1077     }
1078   } else {
1079     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1080   }
1081 
1082   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 #undef __FUNCT__
1087 #define __FUNCT__ "MatPermute_MPIAIJ"
1088 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1089 {
1090   MPI_Comm       comm,pcomm;
1091   PetscInt       first,local_size,nrows,*rows;
1092   int            ntids;
1093   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1094   PetscErrorCode ierr;
1095 
1096   PetscFunctionBegin;
1097   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1098   /* make a collective version of 'rowp' */
1099   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1100   if (pcomm==comm) {
1101     crowp = rowp;
1102   } else {
1103     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1104     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1105     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1106     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1107   }
1108   /* collect the global row permutation and invert it */
1109   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1110   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1111   if (pcomm!=comm) {
1112     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1113   }
1114   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1115   /* get the local target indices */
1116   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1117   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1118   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1119   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1120   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1121   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1122   /* the column permutation is so much easier;
1123      make a local version of 'colp' and invert it */
1124   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1125   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1126   if (ntids==1) {
1127     lcolp = colp;
1128   } else {
1129     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1130     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1131     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1132   }
1133   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1134   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1135   if (ntids>1) {
1136     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1137     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1138   }
1139   /* now we just get the submatrix */
1140   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1141   /* clean up */
1142   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1143   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 #undef __FUNCT__
1148 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1149 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1150 {
1151   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1152   Mat            A = mat->A,B = mat->B;
1153   PetscErrorCode ierr;
1154   PetscReal      isend[5],irecv[5];
1155 
1156   PetscFunctionBegin;
1157   info->block_size     = 1.0;
1158   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1159   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1160   isend[3] = info->memory;  isend[4] = info->mallocs;
1161   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1162   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1163   isend[3] += info->memory;  isend[4] += info->mallocs;
1164   if (flag == MAT_LOCAL) {
1165     info->nz_used      = isend[0];
1166     info->nz_allocated = isend[1];
1167     info->nz_unneeded  = isend[2];
1168     info->memory       = isend[3];
1169     info->mallocs      = isend[4];
1170   } else if (flag == MAT_GLOBAL_MAX) {
1171     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1172     info->nz_used      = irecv[0];
1173     info->nz_allocated = irecv[1];
1174     info->nz_unneeded  = irecv[2];
1175     info->memory       = irecv[3];
1176     info->mallocs      = irecv[4];
1177   } else if (flag == MAT_GLOBAL_SUM) {
1178     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1179     info->nz_used      = irecv[0];
1180     info->nz_allocated = irecv[1];
1181     info->nz_unneeded  = irecv[2];
1182     info->memory       = irecv[3];
1183     info->mallocs      = irecv[4];
1184   }
1185   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1186   info->fill_ratio_needed = 0;
1187   info->factor_mallocs    = 0;
1188   info->rows_global       = (double)matin->rmap.N;
1189   info->columns_global    = (double)matin->cmap.N;
1190   info->rows_local        = (double)matin->rmap.n;
1191   info->columns_local     = (double)matin->cmap.N;
1192 
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 #undef __FUNCT__
1197 #define __FUNCT__ "MatSetOption_MPIAIJ"
1198 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1199 {
1200   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1201   PetscErrorCode ierr;
1202 
1203   PetscFunctionBegin;
1204   switch (op) {
1205   case MAT_NO_NEW_NONZERO_LOCATIONS:
1206   case MAT_YES_NEW_NONZERO_LOCATIONS:
1207   case MAT_COLUMNS_UNSORTED:
1208   case MAT_COLUMNS_SORTED:
1209   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1210   case MAT_KEEP_ZEROED_ROWS:
1211   case MAT_NEW_NONZERO_LOCATION_ERR:
1212   case MAT_USE_INODES:
1213   case MAT_DO_NOT_USE_INODES:
1214   case MAT_IGNORE_ZERO_ENTRIES:
1215     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1216     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1217     break;
1218   case MAT_ROW_ORIENTED:
1219     a->roworiented = PETSC_TRUE;
1220     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1221     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1222     break;
1223   case MAT_ROWS_SORTED:
1224   case MAT_ROWS_UNSORTED:
1225   case MAT_YES_NEW_DIAGONALS:
1226     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1227     break;
1228   case MAT_COLUMN_ORIENTED:
1229     a->roworiented = PETSC_FALSE;
1230     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1231     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1232     break;
1233   case MAT_IGNORE_OFF_PROC_ENTRIES:
1234     a->donotstash = PETSC_TRUE;
1235     break;
1236   case MAT_NO_NEW_DIAGONALS:
1237     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1238   case MAT_SYMMETRIC:
1239     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1240     break;
1241   case MAT_STRUCTURALLY_SYMMETRIC:
1242   case MAT_HERMITIAN:
1243   case MAT_SYMMETRY_ETERNAL:
1244     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1245     break;
1246   case MAT_NOT_SYMMETRIC:
1247   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1248   case MAT_NOT_HERMITIAN:
1249   case MAT_NOT_SYMMETRY_ETERNAL:
1250     break;
1251   default:
1252     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1253   }
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #undef __FUNCT__
1258 #define __FUNCT__ "MatGetRow_MPIAIJ"
1259 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1260 {
1261   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1262   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1263   PetscErrorCode ierr;
1264   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1265   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1266   PetscInt       *cmap,*idx_p;
1267 
1268   PetscFunctionBegin;
1269   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1270   mat->getrowactive = PETSC_TRUE;
1271 
1272   if (!mat->rowvalues && (idx || v)) {
1273     /*
1274         allocate enough space to hold information from the longest row.
1275     */
1276     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1277     PetscInt     max = 1,tmp;
1278     for (i=0; i<matin->rmap.n; i++) {
1279       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1280       if (max < tmp) { max = tmp; }
1281     }
1282     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1283     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1284   }
1285 
1286   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1287   lrow = row - rstart;
1288 
1289   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1290   if (!v)   {pvA = 0; pvB = 0;}
1291   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1292   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1293   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1294   nztot = nzA + nzB;
1295 
1296   cmap  = mat->garray;
1297   if (v  || idx) {
1298     if (nztot) {
1299       /* Sort by increasing column numbers, assuming A and B already sorted */
1300       PetscInt imark = -1;
1301       if (v) {
1302         *v = v_p = mat->rowvalues;
1303         for (i=0; i<nzB; i++) {
1304           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1305           else break;
1306         }
1307         imark = i;
1308         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1309         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1310       }
1311       if (idx) {
1312         *idx = idx_p = mat->rowindices;
1313         if (imark > -1) {
1314           for (i=0; i<imark; i++) {
1315             idx_p[i] = cmap[cworkB[i]];
1316           }
1317         } else {
1318           for (i=0; i<nzB; i++) {
1319             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1320             else break;
1321           }
1322           imark = i;
1323         }
1324         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1325         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1326       }
1327     } else {
1328       if (idx) *idx = 0;
1329       if (v)   *v   = 0;
1330     }
1331   }
1332   *nz = nztot;
1333   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1334   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1335   PetscFunctionReturn(0);
1336 }
1337 
1338 #undef __FUNCT__
1339 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1340 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1341 {
1342   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1343 
1344   PetscFunctionBegin;
1345   if (!aij->getrowactive) {
1346     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1347   }
1348   aij->getrowactive = PETSC_FALSE;
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 #undef __FUNCT__
1353 #define __FUNCT__ "MatNorm_MPIAIJ"
1354 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1355 {
1356   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1357   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1358   PetscErrorCode ierr;
1359   PetscInt       i,j,cstart = mat->cmap.rstart;
1360   PetscReal      sum = 0.0;
1361   PetscScalar    *v;
1362 
1363   PetscFunctionBegin;
1364   if (aij->size == 1) {
1365     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1366   } else {
1367     if (type == NORM_FROBENIUS) {
1368       v = amat->a;
1369       for (i=0; i<amat->nz; i++) {
1370 #if defined(PETSC_USE_COMPLEX)
1371         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1372 #else
1373         sum += (*v)*(*v); v++;
1374 #endif
1375       }
1376       v = bmat->a;
1377       for (i=0; i<bmat->nz; i++) {
1378 #if defined(PETSC_USE_COMPLEX)
1379         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1380 #else
1381         sum += (*v)*(*v); v++;
1382 #endif
1383       }
1384       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1385       *norm = sqrt(*norm);
1386     } else if (type == NORM_1) { /* max column norm */
1387       PetscReal *tmp,*tmp2;
1388       PetscInt    *jj,*garray = aij->garray;
1389       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1390       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1391       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1392       *norm = 0.0;
1393       v = amat->a; jj = amat->j;
1394       for (j=0; j<amat->nz; j++) {
1395         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1396       }
1397       v = bmat->a; jj = bmat->j;
1398       for (j=0; j<bmat->nz; j++) {
1399         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1400       }
1401       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1402       for (j=0; j<mat->cmap.N; j++) {
1403         if (tmp2[j] > *norm) *norm = tmp2[j];
1404       }
1405       ierr = PetscFree(tmp);CHKERRQ(ierr);
1406       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1407     } else if (type == NORM_INFINITY) { /* max row norm */
1408       PetscReal ntemp = 0.0;
1409       for (j=0; j<aij->A->rmap.n; j++) {
1410         v = amat->a + amat->i[j];
1411         sum = 0.0;
1412         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1413           sum += PetscAbsScalar(*v); v++;
1414         }
1415         v = bmat->a + bmat->i[j];
1416         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1417           sum += PetscAbsScalar(*v); v++;
1418         }
1419         if (sum > ntemp) ntemp = sum;
1420       }
1421       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1422     } else {
1423       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1424     }
1425   }
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 #undef __FUNCT__
1430 #define __FUNCT__ "MatTranspose_MPIAIJ"
1431 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1432 {
1433   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1434   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1435   PetscErrorCode ierr;
1436   PetscInt       M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz;
1437   PetscInt       cstart=A->cmap.rstart,ncol;
1438   Mat            B;
1439   PetscScalar    *array;
1440 
1441   PetscFunctionBegin;
1442   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1443 
1444   /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1445   ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n;
1446   ai = Aloc->i; aj = Aloc->j;
1447   bi = Bloc->i; bj = Bloc->j;
1448   ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1449   cols = d_nnz + na + 1; /* work space to be used by B part */
1450   ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1451   for (i=0; i<ai[ma]; i++){
1452     d_nnz[aj[i]] ++;
1453     aj[i] += cstart; /* global col index to be used by MatSetValues() */
1454   }
1455 
1456   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1457   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1458   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1459   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1460 
1461   /* copy over the A part */
1462   array = Aloc->a;
1463   row = A->rmap.rstart;
1464   for (i=0; i<ma; i++) {
1465     ncol = ai[i+1]-ai[i];
1466     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1467     row++; array += ncol; aj += ncol;
1468   }
1469   aj = Aloc->j;
1470   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1471 
1472   /* copy over the B part */
1473   array = Bloc->a;
1474   row = A->rmap.rstart;
1475   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1476   for (i=0; i<mb; i++) {
1477     ncol = bi[i+1]-bi[i];
1478     ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1479     row++; array += ncol; cols += ncol;
1480   }
1481   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1482   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1483   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1484   if (matout) {
1485     *matout = B;
1486   } else {
1487     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1488   }
1489   PetscFunctionReturn(0);
1490 }
1491 
1492 #undef __FUNCT__
1493 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1494 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1495 {
1496   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1497   Mat            a = aij->A,b = aij->B;
1498   PetscErrorCode ierr;
1499   PetscInt       s1,s2,s3;
1500 
1501   PetscFunctionBegin;
1502   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1503   if (rr) {
1504     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1505     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1506     /* Overlap communication with computation. */
1507     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1508   }
1509   if (ll) {
1510     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1511     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1512     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1513   }
1514   /* scale  the diagonal block */
1515   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1516 
1517   if (rr) {
1518     /* Do a scatter end and then right scale the off-diagonal block */
1519     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1520     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1521   }
1522 
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 #undef __FUNCT__
1527 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1528 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1529 {
1530   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1531   PetscErrorCode ierr;
1532 
1533   PetscFunctionBegin;
1534   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1535   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1536   PetscFunctionReturn(0);
1537 }
1538 #undef __FUNCT__
1539 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1540 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1541 {
1542   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1543   PetscErrorCode ierr;
1544 
1545   PetscFunctionBegin;
1546   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1547   PetscFunctionReturn(0);
1548 }
1549 
1550 #undef __FUNCT__
1551 #define __FUNCT__ "MatEqual_MPIAIJ"
1552 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1553 {
1554   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1555   Mat            a,b,c,d;
1556   PetscTruth     flg;
1557   PetscErrorCode ierr;
1558 
1559   PetscFunctionBegin;
1560   a = matA->A; b = matA->B;
1561   c = matB->A; d = matB->B;
1562 
1563   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1564   if (flg) {
1565     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1566   }
1567   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatCopy_MPIAIJ"
1573 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1574 {
1575   PetscErrorCode ierr;
1576   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1577   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1578 
1579   PetscFunctionBegin;
1580   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1581   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1582     /* because of the column compression in the off-processor part of the matrix a->B,
1583        the number of columns in a->B and b->B may be different, hence we cannot call
1584        the MatCopy() directly on the two parts. If need be, we can provide a more
1585        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1586        then copying the submatrices */
1587     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1588   } else {
1589     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1590     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1591   }
1592   PetscFunctionReturn(0);
1593 }
1594 
1595 #undef __FUNCT__
1596 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1597 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1598 {
1599   PetscErrorCode ierr;
1600 
1601   PetscFunctionBegin;
1602   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1603   PetscFunctionReturn(0);
1604 }
1605 
1606 #include "petscblaslapack.h"
1607 #undef __FUNCT__
1608 #define __FUNCT__ "MatAXPY_MPIAIJ"
1609 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1610 {
1611   PetscErrorCode ierr;
1612   PetscInt       i;
1613   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1614   PetscBLASInt   bnz,one=1;
1615   Mat_SeqAIJ     *x,*y;
1616 
1617   PetscFunctionBegin;
1618   if (str == SAME_NONZERO_PATTERN) {
1619     PetscScalar alpha = a;
1620     x = (Mat_SeqAIJ *)xx->A->data;
1621     y = (Mat_SeqAIJ *)yy->A->data;
1622     bnz = (PetscBLASInt)x->nz;
1623     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1624     x = (Mat_SeqAIJ *)xx->B->data;
1625     y = (Mat_SeqAIJ *)yy->B->data;
1626     bnz = (PetscBLASInt)x->nz;
1627     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1628   } else if (str == SUBSET_NONZERO_PATTERN) {
1629     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1630 
1631     x = (Mat_SeqAIJ *)xx->B->data;
1632     y = (Mat_SeqAIJ *)yy->B->data;
1633     if (y->xtoy && y->XtoY != xx->B) {
1634       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1635       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1636     }
1637     if (!y->xtoy) { /* get xtoy */
1638       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1639       y->XtoY = xx->B;
1640       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1641     }
1642     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1643   } else {
1644     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1645   }
1646   PetscFunctionReturn(0);
1647 }
1648 
1649 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1650 
1651 #undef __FUNCT__
1652 #define __FUNCT__ "MatConjugate_MPIAIJ"
1653 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1654 {
1655 #if defined(PETSC_USE_COMPLEX)
1656   PetscErrorCode ierr;
1657   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1658 
1659   PetscFunctionBegin;
1660   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1661   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1662 #else
1663   PetscFunctionBegin;
1664 #endif
1665   PetscFunctionReturn(0);
1666 }
1667 
1668 #undef __FUNCT__
1669 #define __FUNCT__ "MatRealPart_MPIAIJ"
1670 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1671 {
1672   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1673   PetscErrorCode ierr;
1674 
1675   PetscFunctionBegin;
1676   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1677   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1678   PetscFunctionReturn(0);
1679 }
1680 
1681 #undef __FUNCT__
1682 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1683 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1684 {
1685   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1686   PetscErrorCode ierr;
1687 
1688   PetscFunctionBegin;
1689   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1690   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1691   PetscFunctionReturn(0);
1692 }
1693 
1694 #ifdef PETSC_HAVE_PBGL
1695 
1696 #include <boost/parallel/mpi/bsp_process_group.hpp>
1697 #include <boost/graph/distributed/ilu_default_graph.hpp>
1698 #include <boost/graph/distributed/ilu_0_block.hpp>
1699 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1700 #include <boost/graph/distributed/petsc/interface.hpp>
1701 #include <boost/multi_array.hpp>
1702 #include <boost/parallel/distributed_property_map.hpp>
1703 
1704 #undef __FUNCT__
1705 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1706 /*
1707   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1708 */
1709 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1710 {
1711   namespace petsc = boost::distributed::petsc;
1712 
1713   namespace graph_dist = boost::graph::distributed;
1714   using boost::graph::distributed::ilu_default::process_group_type;
1715   using boost::graph::ilu_permuted;
1716 
1717   PetscTruth      row_identity, col_identity;
1718   PetscContainer  c;
1719   PetscInt        m, n, M, N;
1720   PetscErrorCode  ierr;
1721 
1722   PetscFunctionBegin;
1723   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1724   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1725   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1726   if (!row_identity || !col_identity) {
1727     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1728   }
1729 
1730   process_group_type pg;
1731   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1732   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1733   lgraph_type&   level_graph = *lgraph_p;
1734   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1735 
1736   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1737   ilu_permuted(level_graph);
1738 
1739   /* put together the new matrix */
1740   ierr = MatCreate(A->comm, fact);CHKERRQ(ierr);
1741   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1742   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1743   ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr);
1744   ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr);
1745   ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1746   ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1747   (*fact)->factor = FACTOR_LU;
1748 
1749   ierr = PetscContainerCreate(A->comm, &c);
1750   ierr = PetscContainerSetPointer(c, lgraph_p);
1751   ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1752   PetscFunctionReturn(0);
1753 }
1754 
1755 #undef __FUNCT__
1756 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1757 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1758 {
1759   PetscFunctionBegin;
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 #undef __FUNCT__
1764 #define __FUNCT__ "MatSolve_MPIAIJ"
1765 /*
1766   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1767 */
1768 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1769 {
1770   namespace graph_dist = boost::graph::distributed;
1771 
1772   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1773   lgraph_type*   lgraph_p;
1774   PetscContainer c;
1775   PetscErrorCode ierr;
1776 
1777   PetscFunctionBegin;
1778   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1779   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1780   ierr = VecCopy(b, x); CHKERRQ(ierr);
1781 
1782   PetscScalar* array_x;
1783   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1784   PetscInt sx;
1785   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1786 
1787   PetscScalar* array_b;
1788   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1789   PetscInt sb;
1790   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1791 
1792   lgraph_type&   level_graph = *lgraph_p;
1793   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1794 
1795   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1796   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1797                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1798 
1799   typedef boost::iterator_property_map<array_ref_type::iterator,
1800                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1801   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1802                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1803 
1804   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1805 
1806   PetscFunctionReturn(0);
1807 }
1808 #endif
1809 
1810 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1811   PetscInt       nzlocal,nsends,nrecvs;
1812   PetscInt       *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j;
1813   PetscScalar    *sbuf_a,**rbuf_a;
1814   PetscErrorCode (*MatDestroy)(Mat);
1815 } Mat_Redundant;
1816 
1817 #undef __FUNCT__
1818 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1819 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1820 {
1821   PetscErrorCode       ierr;
1822   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1823   PetscInt             i;
1824 
1825   PetscFunctionBegin;
1826   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1827   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1828   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1829   for (i=0; i<redund->nrecvs; i++){
1830     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1831     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1832   }
1833   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1834   ierr = PetscFree(redund);CHKERRQ(ierr);
1835   PetscFunctionReturn(0);
1836 }
1837 
1838 #undef __FUNCT__
1839 #define __FUNCT__ "MatDestroy_MatRedundant"
1840 PetscErrorCode MatDestroy_MatRedundant(Mat A)
1841 {
1842   PetscErrorCode  ierr;
1843   PetscContainer  container;
1844   Mat_Redundant   *redund=PETSC_NULL;
1845 
1846   PetscFunctionBegin;
1847   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1848   if (container) {
1849     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1850   } else {
1851     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1852   }
1853   A->ops->destroy = redund->MatDestroy;
1854   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
1855   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
1856   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
1857   PetscFunctionReturn(0);
1858 }
1859 
1860 #undef __FUNCT__
1861 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
1862 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
1863 {
1864   PetscMPIInt    rank,size;
1865   MPI_Comm       comm=mat->comm;
1866   PetscErrorCode ierr;
1867   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
1868   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
1869   PetscInt       *rowrange=mat->rmap.range;
1870   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1871   Mat            A=aij->A,B=aij->B,C=*matredundant;
1872   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
1873   PetscScalar    *sbuf_a;
1874   PetscInt       nzlocal=a->nz+b->nz;
1875   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
1876   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
1877   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
1878   PetscScalar    *vals,*aworkA,*aworkB;
1879   PetscMPIInt    tag1,tag2,tag3,imdex;
1880   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
1881                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
1882   MPI_Status     recv_status,*send_status;
1883   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
1884   PetscInt       **rbuf_j=PETSC_NULL;
1885   PetscScalar    **rbuf_a=PETSC_NULL;
1886   Mat_Redundant  *redund=PETSC_NULL;
1887   PetscContainer container;
1888 
1889   PetscFunctionBegin;
1890   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1891   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1892 
1893   if (reuse == MAT_REUSE_MATRIX) {
1894     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
1895     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
1896     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
1897     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
1898     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1899     if (container) {
1900       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1901     } else {
1902       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1903     }
1904     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
1905 
1906     nsends    = redund->nsends;
1907     nrecvs    = redund->nrecvs;
1908     send_rank = redund->send_rank; recv_rank = send_rank + size;
1909     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
1910     sbuf_j    = redund->sbuf_j;
1911     sbuf_a    = redund->sbuf_a;
1912     rbuf_j    = redund->rbuf_j;
1913     rbuf_a    = redund->rbuf_a;
1914   }
1915 
1916   if (reuse == MAT_INITIAL_MATRIX){
1917     PetscMPIInt  subrank,subsize;
1918     PetscInt     nleftover,np_subcomm;
1919     /* get the destination processors' id send_rank, nsends and nrecvs */
1920     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
1921     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
1922     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
1923     recv_rank = send_rank + size;
1924     np_subcomm = size/nsubcomm;
1925     nleftover  = size - nsubcomm*np_subcomm;
1926     nsends = 0; nrecvs = 0;
1927     for (i=0; i<size; i++){ /* i=rank*/
1928       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
1929         send_rank[nsends] = i; nsends++;
1930         recv_rank[nrecvs++] = i;
1931       }
1932     }
1933     if (rank >= size - nleftover){/* this proc is a leftover processor */
1934       i = size-nleftover-1;
1935       j = 0;
1936       while (j < nsubcomm - nleftover){
1937         send_rank[nsends++] = i;
1938         i--; j++;
1939       }
1940     }
1941 
1942     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
1943       for (i=0; i<nleftover; i++){
1944         recv_rank[nrecvs++] = size-nleftover+i;
1945       }
1946     }
1947 
1948     /* allocate sbuf_j, sbuf_a */
1949     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
1950     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
1951     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
1952   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1953 
1954   /* copy mat's local entries into the buffers */
1955   if (reuse == MAT_INITIAL_MATRIX){
1956     rownz_max = 0;
1957     rptr = sbuf_j;
1958     cols = sbuf_j + rend-rstart + 1;
1959     vals = sbuf_a;
1960     rptr[0] = 0;
1961     for (i=0; i<rend-rstart; i++){
1962       row = i + rstart;
1963       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1964       ncols  = nzA + nzB;
1965       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1966       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1967       /* load the column indices for this row into cols */
1968       lwrite = 0;
1969       for (l=0; l<nzB; l++) {
1970         if ((ctmp = bmap[cworkB[l]]) < cstart){
1971           vals[lwrite]   = aworkB[l];
1972           cols[lwrite++] = ctmp;
1973         }
1974       }
1975       for (l=0; l<nzA; l++){
1976         vals[lwrite]   = aworkA[l];
1977         cols[lwrite++] = cstart + cworkA[l];
1978       }
1979       for (l=0; l<nzB; l++) {
1980         if ((ctmp = bmap[cworkB[l]]) >= cend){
1981           vals[lwrite]   = aworkB[l];
1982           cols[lwrite++] = ctmp;
1983         }
1984       }
1985       vals += ncols;
1986       cols += ncols;
1987       rptr[i+1] = rptr[i] + ncols;
1988       if (rownz_max < ncols) rownz_max = ncols;
1989     }
1990     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
1991   } else { /* only copy matrix values into sbuf_a */
1992     rptr = sbuf_j;
1993     vals = sbuf_a;
1994     rptr[0] = 0;
1995     for (i=0; i<rend-rstart; i++){
1996       row = i + rstart;
1997       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1998       ncols  = nzA + nzB;
1999       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2000       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2001       lwrite = 0;
2002       for (l=0; l<nzB; l++) {
2003         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2004       }
2005       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2006       for (l=0; l<nzB; l++) {
2007         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2008       }
2009       vals += ncols;
2010       rptr[i+1] = rptr[i] + ncols;
2011     }
2012   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2013 
2014   /* send nzlocal to others, and recv other's nzlocal */
2015   /*--------------------------------------------------*/
2016   if (reuse == MAT_INITIAL_MATRIX){
2017     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2018     s_waits2 = s_waits3 + nsends;
2019     s_waits1 = s_waits2 + nsends;
2020     r_waits1 = s_waits1 + nsends;
2021     r_waits2 = r_waits1 + nrecvs;
2022     r_waits3 = r_waits2 + nrecvs;
2023   } else {
2024     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2025     r_waits3 = s_waits3 + nsends;
2026   }
2027 
2028   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2029   if (reuse == MAT_INITIAL_MATRIX){
2030     /* get new tags to keep the communication clean */
2031     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2032     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2033     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2034     rbuf_nz = sbuf_nz + nsends;
2035 
2036     /* post receives of other's nzlocal */
2037     for (i=0; i<nrecvs; i++){
2038       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2039     }
2040     /* send nzlocal to others */
2041     for (i=0; i<nsends; i++){
2042       sbuf_nz[i] = nzlocal;
2043       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2044     }
2045     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2046     count = nrecvs;
2047     while (count) {
2048       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2049       recv_rank[imdex] = recv_status.MPI_SOURCE;
2050       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2051       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2052 
2053       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2054       rbuf_nz[imdex] += i + 2;
2055       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2056       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2057       count--;
2058     }
2059     /* wait on sends of nzlocal */
2060     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2061     /* send mat->i,j to others, and recv from other's */
2062     /*------------------------------------------------*/
2063     for (i=0; i<nsends; i++){
2064       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2065       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2066     }
2067     /* wait on receives of mat->i,j */
2068     /*------------------------------*/
2069     count = nrecvs;
2070     while (count) {
2071       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2072       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2073       count--;
2074     }
2075     /* wait on sends of mat->i,j */
2076     /*---------------------------*/
2077     if (nsends) {
2078       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2079     }
2080   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2081 
2082   /* post receives, send and receive mat->a */
2083   /*----------------------------------------*/
2084   for (imdex=0; imdex<nrecvs; imdex++) {
2085     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2086   }
2087   for (i=0; i<nsends; i++){
2088     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2089   }
2090   count = nrecvs;
2091   while (count) {
2092     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2093     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2094     count--;
2095   }
2096   if (nsends) {
2097     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2098   }
2099 
2100   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2101 
2102   /* create redundant matrix */
2103   /*-------------------------*/
2104   if (reuse == MAT_INITIAL_MATRIX){
2105     /* compute rownz_max for preallocation */
2106     for (imdex=0; imdex<nrecvs; imdex++){
2107       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2108       rptr = rbuf_j[imdex];
2109       for (i=0; i<j; i++){
2110         ncols = rptr[i+1] - rptr[i];
2111         if (rownz_max < ncols) rownz_max = ncols;
2112       }
2113     }
2114 
2115     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2116     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2117     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2118     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2119     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2120   } else {
2121     C = *matredundant;
2122   }
2123 
2124   /* insert local matrix entries */
2125   rptr = sbuf_j;
2126   cols = sbuf_j + rend-rstart + 1;
2127   vals = sbuf_a;
2128   for (i=0; i<rend-rstart; i++){
2129     row   = i + rstart;
2130     ncols = rptr[i+1] - rptr[i];
2131     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2132     vals += ncols;
2133     cols += ncols;
2134   }
2135   /* insert received matrix entries */
2136   for (imdex=0; imdex<nrecvs; imdex++){
2137     rstart = rowrange[recv_rank[imdex]];
2138     rend   = rowrange[recv_rank[imdex]+1];
2139     rptr = rbuf_j[imdex];
2140     cols = rbuf_j[imdex] + rend-rstart + 1;
2141     vals = rbuf_a[imdex];
2142     for (i=0; i<rend-rstart; i++){
2143       row   = i + rstart;
2144       ncols = rptr[i+1] - rptr[i];
2145       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2146       vals += ncols;
2147       cols += ncols;
2148     }
2149   }
2150   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2151   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2152   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2153   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
2154   if (reuse == MAT_INITIAL_MATRIX){
2155     PetscContainer container;
2156     *matredundant = C;
2157     /* create a supporting struct and attach it to C for reuse */
2158     ierr = PetscNew(Mat_Redundant,&redund);CHKERRQ(ierr);
2159     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2160     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2161     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2162     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2163 
2164     redund->nzlocal = nzlocal;
2165     redund->nsends  = nsends;
2166     redund->nrecvs  = nrecvs;
2167     redund->send_rank = send_rank;
2168     redund->sbuf_nz = sbuf_nz;
2169     redund->sbuf_j  = sbuf_j;
2170     redund->sbuf_a  = sbuf_a;
2171     redund->rbuf_j  = rbuf_j;
2172     redund->rbuf_a  = rbuf_a;
2173 
2174     redund->MatDestroy = C->ops->destroy;
2175     C->ops->destroy    = MatDestroy_MatRedundant;
2176   }
2177   PetscFunctionReturn(0);
2178 }
2179 
2180 /* -------------------------------------------------------------------*/
2181 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2182        MatGetRow_MPIAIJ,
2183        MatRestoreRow_MPIAIJ,
2184        MatMult_MPIAIJ,
2185 /* 4*/ MatMultAdd_MPIAIJ,
2186        MatMultTranspose_MPIAIJ,
2187        MatMultTransposeAdd_MPIAIJ,
2188 #ifdef PETSC_HAVE_PBGL
2189        MatSolve_MPIAIJ,
2190 #else
2191        0,
2192 #endif
2193        0,
2194        0,
2195 /*10*/ 0,
2196        0,
2197        0,
2198        MatRelax_MPIAIJ,
2199        MatTranspose_MPIAIJ,
2200 /*15*/ MatGetInfo_MPIAIJ,
2201        MatEqual_MPIAIJ,
2202        MatGetDiagonal_MPIAIJ,
2203        MatDiagonalScale_MPIAIJ,
2204        MatNorm_MPIAIJ,
2205 /*20*/ MatAssemblyBegin_MPIAIJ,
2206        MatAssemblyEnd_MPIAIJ,
2207        0,
2208        MatSetOption_MPIAIJ,
2209        MatZeroEntries_MPIAIJ,
2210 /*25*/ MatZeroRows_MPIAIJ,
2211        0,
2212 #ifdef PETSC_HAVE_PBGL
2213        MatLUFactorNumeric_MPIAIJ,
2214 #else
2215        0,
2216 #endif
2217        0,
2218        0,
2219 /*30*/ MatSetUpPreallocation_MPIAIJ,
2220 #ifdef PETSC_HAVE_PBGL
2221        MatILUFactorSymbolic_MPIAIJ,
2222 #else
2223        0,
2224 #endif
2225        0,
2226        0,
2227        0,
2228 /*35*/ MatDuplicate_MPIAIJ,
2229        0,
2230        0,
2231        0,
2232        0,
2233 /*40*/ MatAXPY_MPIAIJ,
2234        MatGetSubMatrices_MPIAIJ,
2235        MatIncreaseOverlap_MPIAIJ,
2236        MatGetValues_MPIAIJ,
2237        MatCopy_MPIAIJ,
2238 /*45*/ 0,
2239        MatScale_MPIAIJ,
2240        0,
2241        0,
2242        0,
2243 /*50*/ MatSetBlockSize_MPIAIJ,
2244        0,
2245        0,
2246        0,
2247        0,
2248 /*55*/ MatFDColoringCreate_MPIAIJ,
2249        0,
2250        MatSetUnfactored_MPIAIJ,
2251        MatPermute_MPIAIJ,
2252        0,
2253 /*60*/ MatGetSubMatrix_MPIAIJ,
2254        MatDestroy_MPIAIJ,
2255        MatView_MPIAIJ,
2256        0,
2257        0,
2258 /*65*/ 0,
2259        0,
2260        0,
2261        0,
2262        0,
2263 /*70*/ 0,
2264        0,
2265        MatSetColoring_MPIAIJ,
2266 #if defined(PETSC_HAVE_ADIC)
2267        MatSetValuesAdic_MPIAIJ,
2268 #else
2269        0,
2270 #endif
2271        MatSetValuesAdifor_MPIAIJ,
2272 /*75*/ 0,
2273        0,
2274        0,
2275        0,
2276        0,
2277 /*80*/ 0,
2278        0,
2279        0,
2280        0,
2281 /*84*/ MatLoad_MPIAIJ,
2282        0,
2283        0,
2284        0,
2285        0,
2286        0,
2287 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2288        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2289        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2290        MatPtAP_Basic,
2291        MatPtAPSymbolic_MPIAIJ,
2292 /*95*/ MatPtAPNumeric_MPIAIJ,
2293        0,
2294        0,
2295        0,
2296        0,
2297 /*100*/0,
2298        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2299        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2300        MatConjugate_MPIAIJ,
2301        0,
2302 /*105*/MatSetValuesRow_MPIAIJ,
2303        MatRealPart_MPIAIJ,
2304        MatImaginaryPart_MPIAIJ,
2305        0,
2306        0,
2307 /*110*/0,
2308        MatGetRedundantMatrix_MPIAIJ};
2309 
2310 /* ----------------------------------------------------------------------------------------*/
2311 
2312 EXTERN_C_BEGIN
2313 #undef __FUNCT__
2314 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2315 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2316 {
2317   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2318   PetscErrorCode ierr;
2319 
2320   PetscFunctionBegin;
2321   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2322   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2323   PetscFunctionReturn(0);
2324 }
2325 EXTERN_C_END
2326 
2327 EXTERN_C_BEGIN
2328 #undef __FUNCT__
2329 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2330 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2331 {
2332   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2333   PetscErrorCode ierr;
2334 
2335   PetscFunctionBegin;
2336   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2337   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2338   PetscFunctionReturn(0);
2339 }
2340 EXTERN_C_END
2341 
2342 #include "petscpc.h"
2343 EXTERN_C_BEGIN
2344 #undef __FUNCT__
2345 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2346 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2347 {
2348   Mat_MPIAIJ     *b;
2349   PetscErrorCode ierr;
2350   PetscInt       i;
2351 
2352   PetscFunctionBegin;
2353   B->preallocated = PETSC_TRUE;
2354   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2355   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2356   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2357   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2358 
2359   B->rmap.bs = B->cmap.bs = 1;
2360   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2361   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2362   if (d_nnz) {
2363     for (i=0; i<B->rmap.n; i++) {
2364       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2365     }
2366   }
2367   if (o_nnz) {
2368     for (i=0; i<B->rmap.n; i++) {
2369       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2370     }
2371   }
2372   b = (Mat_MPIAIJ*)B->data;
2373 
2374   /* Explicitly create 2 MATSEQAIJ matrices. */
2375   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2376   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2377   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2378   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2379   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2380   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2381   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2382   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2383 
2384   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2385   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2386 
2387   PetscFunctionReturn(0);
2388 }
2389 EXTERN_C_END
2390 
2391 #undef __FUNCT__
2392 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2393 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2394 {
2395   Mat            mat;
2396   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2397   PetscErrorCode ierr;
2398 
2399   PetscFunctionBegin;
2400   *newmat       = 0;
2401   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2402   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2403   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2404   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2405   a    = (Mat_MPIAIJ*)mat->data;
2406 
2407   mat->factor       = matin->factor;
2408   mat->rmap.bs      = matin->rmap.bs;
2409   mat->assembled    = PETSC_TRUE;
2410   mat->insertmode   = NOT_SET_VALUES;
2411   mat->preallocated = PETSC_TRUE;
2412 
2413   a->size           = oldmat->size;
2414   a->rank           = oldmat->rank;
2415   a->donotstash     = oldmat->donotstash;
2416   a->roworiented    = oldmat->roworiented;
2417   a->rowindices     = 0;
2418   a->rowvalues      = 0;
2419   a->getrowactive   = PETSC_FALSE;
2420 
2421   ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2422   ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2423 
2424   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2425   if (oldmat->colmap) {
2426 #if defined (PETSC_USE_CTABLE)
2427     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2428 #else
2429     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2430     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2431     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2432 #endif
2433   } else a->colmap = 0;
2434   if (oldmat->garray) {
2435     PetscInt len;
2436     len  = oldmat->B->cmap.n;
2437     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2438     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2439     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2440   } else a->garray = 0;
2441 
2442   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2443   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2444   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2445   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2446   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2447   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2448   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2449   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2450   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2451   *newmat = mat;
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 #include "petscsys.h"
2456 
2457 #undef __FUNCT__
2458 #define __FUNCT__ "MatLoad_MPIAIJ"
2459 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2460 {
2461   Mat            A;
2462   PetscScalar    *vals,*svals;
2463   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2464   MPI_Status     status;
2465   PetscErrorCode ierr;
2466   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2467   PetscInt       i,nz,j,rstart,rend,mmax;
2468   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2469   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2470   PetscInt       cend,cstart,n,*rowners;
2471   int            fd;
2472 
2473   PetscFunctionBegin;
2474   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2475   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2476   if (!rank) {
2477     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2478     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2479     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2480   }
2481 
2482   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2483   M = header[1]; N = header[2];
2484   /* determine ownership of all rows */
2485   m    = M/size + ((M % size) > rank);
2486   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2487   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2488 
2489   /* First process needs enough room for process with most rows */
2490   if (!rank) {
2491     mmax       = rowners[1];
2492     for (i=2; i<size; i++) {
2493       mmax = PetscMax(mmax,rowners[i]);
2494     }
2495   } else mmax = m;
2496 
2497   rowners[0] = 0;
2498   for (i=2; i<=size; i++) {
2499     rowners[i] += rowners[i-1];
2500   }
2501   rstart = rowners[rank];
2502   rend   = rowners[rank+1];
2503 
2504   /* distribute row lengths to all processors */
2505   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2506   if (!rank) {
2507     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2508     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2509     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2510     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2511     for (j=0; j<m; j++) {
2512       procsnz[0] += ourlens[j];
2513     }
2514     for (i=1; i<size; i++) {
2515       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2516       /* calculate the number of nonzeros on each processor */
2517       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2518         procsnz[i] += rowlengths[j];
2519       }
2520       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2521     }
2522     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2523   } else {
2524     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2525   }
2526 
2527   if (!rank) {
2528     /* determine max buffer needed and allocate it */
2529     maxnz = 0;
2530     for (i=0; i<size; i++) {
2531       maxnz = PetscMax(maxnz,procsnz[i]);
2532     }
2533     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2534 
2535     /* read in my part of the matrix column indices  */
2536     nz   = procsnz[0];
2537     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2538     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2539 
2540     /* read in every one elses and ship off */
2541     for (i=1; i<size; i++) {
2542       nz   = procsnz[i];
2543       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2544       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2545     }
2546     ierr = PetscFree(cols);CHKERRQ(ierr);
2547   } else {
2548     /* determine buffer space needed for message */
2549     nz = 0;
2550     for (i=0; i<m; i++) {
2551       nz += ourlens[i];
2552     }
2553     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2554 
2555     /* receive message of column indices*/
2556     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2557     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2558     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2559   }
2560 
2561   /* determine column ownership if matrix is not square */
2562   if (N != M) {
2563     n      = N/size + ((N % size) > rank);
2564     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2565     cstart = cend - n;
2566   } else {
2567     cstart = rstart;
2568     cend   = rend;
2569     n      = cend - cstart;
2570   }
2571 
2572   /* loop over local rows, determining number of off diagonal entries */
2573   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2574   jj = 0;
2575   for (i=0; i<m; i++) {
2576     for (j=0; j<ourlens[i]; j++) {
2577       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2578       jj++;
2579     }
2580   }
2581 
2582   /* create our matrix */
2583   for (i=0; i<m; i++) {
2584     ourlens[i] -= offlens[i];
2585   }
2586   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2587   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2588   ierr = MatSetType(A,type);CHKERRQ(ierr);
2589   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2590 
2591   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2592   for (i=0; i<m; i++) {
2593     ourlens[i] += offlens[i];
2594   }
2595 
2596   if (!rank) {
2597     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2598 
2599     /* read in my part of the matrix numerical values  */
2600     nz   = procsnz[0];
2601     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2602 
2603     /* insert into matrix */
2604     jj      = rstart;
2605     smycols = mycols;
2606     svals   = vals;
2607     for (i=0; i<m; i++) {
2608       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2609       smycols += ourlens[i];
2610       svals   += ourlens[i];
2611       jj++;
2612     }
2613 
2614     /* read in other processors and ship out */
2615     for (i=1; i<size; i++) {
2616       nz   = procsnz[i];
2617       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2618       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2619     }
2620     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2621   } else {
2622     /* receive numeric values */
2623     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2624 
2625     /* receive message of values*/
2626     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2627     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2628     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2629 
2630     /* insert into matrix */
2631     jj      = rstart;
2632     smycols = mycols;
2633     svals   = vals;
2634     for (i=0; i<m; i++) {
2635       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2636       smycols += ourlens[i];
2637       svals   += ourlens[i];
2638       jj++;
2639     }
2640   }
2641   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2642   ierr = PetscFree(vals);CHKERRQ(ierr);
2643   ierr = PetscFree(mycols);CHKERRQ(ierr);
2644   ierr = PetscFree(rowners);CHKERRQ(ierr);
2645 
2646   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2647   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2648   *newmat = A;
2649   PetscFunctionReturn(0);
2650 }
2651 
2652 #undef __FUNCT__
2653 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2654 /*
2655     Not great since it makes two copies of the submatrix, first an SeqAIJ
2656   in local and then by concatenating the local matrices the end result.
2657   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2658 */
2659 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2660 {
2661   PetscErrorCode ierr;
2662   PetscMPIInt    rank,size;
2663   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2664   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2665   Mat            *local,M,Mreuse;
2666   PetscScalar    *vwork,*aa;
2667   MPI_Comm       comm = mat->comm;
2668   Mat_SeqAIJ     *aij;
2669 
2670 
2671   PetscFunctionBegin;
2672   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2673   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2674 
2675   if (call ==  MAT_REUSE_MATRIX) {
2676     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2677     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2678     local = &Mreuse;
2679     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2680   } else {
2681     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2682     Mreuse = *local;
2683     ierr   = PetscFree(local);CHKERRQ(ierr);
2684   }
2685 
2686   /*
2687       m - number of local rows
2688       n - number of columns (same on all processors)
2689       rstart - first row in new global matrix generated
2690   */
2691   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2692   if (call == MAT_INITIAL_MATRIX) {
2693     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2694     ii  = aij->i;
2695     jj  = aij->j;
2696 
2697     /*
2698         Determine the number of non-zeros in the diagonal and off-diagonal
2699         portions of the matrix in order to do correct preallocation
2700     */
2701 
2702     /* first get start and end of "diagonal" columns */
2703     if (csize == PETSC_DECIDE) {
2704       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2705       if (mglobal == n) { /* square matrix */
2706 	nlocal = m;
2707       } else {
2708         nlocal = n/size + ((n % size) > rank);
2709       }
2710     } else {
2711       nlocal = csize;
2712     }
2713     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2714     rstart = rend - nlocal;
2715     if (rank == size - 1 && rend != n) {
2716       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2717     }
2718 
2719     /* next, compute all the lengths */
2720     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2721     olens = dlens + m;
2722     for (i=0; i<m; i++) {
2723       jend = ii[i+1] - ii[i];
2724       olen = 0;
2725       dlen = 0;
2726       for (j=0; j<jend; j++) {
2727         if (*jj < rstart || *jj >= rend) olen++;
2728         else dlen++;
2729         jj++;
2730       }
2731       olens[i] = olen;
2732       dlens[i] = dlen;
2733     }
2734     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2735     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2736     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2737     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2738     ierr = PetscFree(dlens);CHKERRQ(ierr);
2739   } else {
2740     PetscInt ml,nl;
2741 
2742     M = *newmat;
2743     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2744     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2745     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2746     /*
2747          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2748        rather than the slower MatSetValues().
2749     */
2750     M->was_assembled = PETSC_TRUE;
2751     M->assembled     = PETSC_FALSE;
2752   }
2753   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2754   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2755   ii  = aij->i;
2756   jj  = aij->j;
2757   aa  = aij->a;
2758   for (i=0; i<m; i++) {
2759     row   = rstart + i;
2760     nz    = ii[i+1] - ii[i];
2761     cwork = jj;     jj += nz;
2762     vwork = aa;     aa += nz;
2763     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2764   }
2765 
2766   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2767   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2768   *newmat = M;
2769 
2770   /* save submatrix used in processor for next request */
2771   if (call ==  MAT_INITIAL_MATRIX) {
2772     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2773     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2774   }
2775 
2776   PetscFunctionReturn(0);
2777 }
2778 
2779 EXTERN_C_BEGIN
2780 #undef __FUNCT__
2781 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2782 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2783 {
2784   PetscInt       m,cstart, cend,j,nnz,i,d;
2785   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2786   const PetscInt *JJ;
2787   PetscScalar    *values;
2788   PetscErrorCode ierr;
2789 
2790   PetscFunctionBegin;
2791   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2792 
2793   B->rmap.bs = B->cmap.bs = 1;
2794   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2795   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2796   m      = B->rmap.n;
2797   cstart = B->cmap.rstart;
2798   cend   = B->cmap.rend;
2799   rstart = B->rmap.rstart;
2800 
2801   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2802   o_nnz = d_nnz + m;
2803 
2804   for (i=0; i<m; i++) {
2805     nnz     = Ii[i+1]- Ii[i];
2806     JJ      = J + Ii[i];
2807     nnz_max = PetscMax(nnz_max,nnz);
2808     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2809     for (j=0; j<nnz; j++) {
2810       if (*JJ >= cstart) break;
2811       JJ++;
2812     }
2813     d = 0;
2814     for (; j<nnz; j++) {
2815       if (*JJ++ >= cend) break;
2816       d++;
2817     }
2818     d_nnz[i] = d;
2819     o_nnz[i] = nnz - d;
2820   }
2821   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2822   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2823 
2824   if (v) values = (PetscScalar*)v;
2825   else {
2826     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2827     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2828   }
2829 
2830   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2831   for (i=0; i<m; i++) {
2832     ii   = i + rstart;
2833     nnz  = Ii[i+1]- Ii[i];
2834     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2835   }
2836   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2837   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2838   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2839 
2840   if (!v) {
2841     ierr = PetscFree(values);CHKERRQ(ierr);
2842   }
2843   PetscFunctionReturn(0);
2844 }
2845 EXTERN_C_END
2846 
2847 #undef __FUNCT__
2848 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2849 /*@
2850    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2851    (the default parallel PETSc format).
2852 
2853    Collective on MPI_Comm
2854 
2855    Input Parameters:
2856 +  B - the matrix
2857 .  i - the indices into j for the start of each local row (starts with zero)
2858 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2859 -  v - optional values in the matrix
2860 
2861    Level: developer
2862 
2863    Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal
2864      storage format. Thus changing the values in a[] after this call will not effect the matrix values.
2865 
2866 .keywords: matrix, aij, compressed row, sparse, parallel
2867 
2868 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
2869           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
2870 @*/
2871 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2872 {
2873   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2874 
2875   PetscFunctionBegin;
2876   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2877   if (f) {
2878     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2879   }
2880   PetscFunctionReturn(0);
2881 }
2882 
2883 #undef __FUNCT__
2884 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2885 /*@C
2886    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2887    (the default parallel PETSc format).  For good matrix assembly performance
2888    the user should preallocate the matrix storage by setting the parameters
2889    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2890    performance can be increased by more than a factor of 50.
2891 
2892    Collective on MPI_Comm
2893 
2894    Input Parameters:
2895 +  A - the matrix
2896 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2897            (same value is used for all local rows)
2898 .  d_nnz - array containing the number of nonzeros in the various rows of the
2899            DIAGONAL portion of the local submatrix (possibly different for each row)
2900            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2901            The size of this array is equal to the number of local rows, i.e 'm'.
2902            You must leave room for the diagonal entry even if it is zero.
2903 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2904            submatrix (same value is used for all local rows).
2905 -  o_nnz - array containing the number of nonzeros in the various rows of the
2906            OFF-DIAGONAL portion of the local submatrix (possibly different for
2907            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2908            structure. The size of this array is equal to the number
2909            of local rows, i.e 'm'.
2910 
2911    If the *_nnz parameter is given then the *_nz parameter is ignored
2912 
2913    The AIJ format (also called the Yale sparse matrix format or
2914    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2915    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2916 
2917    The parallel matrix is partitioned such that the first m0 rows belong to
2918    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2919    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2920 
2921    The DIAGONAL portion of the local submatrix of a processor can be defined
2922    as the submatrix which is obtained by extraction the part corresponding
2923    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2924    first row that belongs to the processor, and r2 is the last row belonging
2925    to the this processor. This is a square mxm matrix. The remaining portion
2926    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2927 
2928    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2929 
2930    Example usage:
2931 
2932    Consider the following 8x8 matrix with 34 non-zero values, that is
2933    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2934    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2935    as follows:
2936 
2937 .vb
2938             1  2  0  |  0  3  0  |  0  4
2939     Proc0   0  5  6  |  7  0  0  |  8  0
2940             9  0 10  | 11  0  0  | 12  0
2941     -------------------------------------
2942            13  0 14  | 15 16 17  |  0  0
2943     Proc1   0 18  0  | 19 20 21  |  0  0
2944             0  0  0  | 22 23  0  | 24  0
2945     -------------------------------------
2946     Proc2  25 26 27  |  0  0 28  | 29  0
2947            30  0  0  | 31 32 33  |  0 34
2948 .ve
2949 
2950    This can be represented as a collection of submatrices as:
2951 
2952 .vb
2953       A B C
2954       D E F
2955       G H I
2956 .ve
2957 
2958    Where the submatrices A,B,C are owned by proc0, D,E,F are
2959    owned by proc1, G,H,I are owned by proc2.
2960 
2961    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2962    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2963    The 'M','N' parameters are 8,8, and have the same values on all procs.
2964 
2965    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2966    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2967    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2968    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2969    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2970    matrix, ans [DF] as another SeqAIJ matrix.
2971 
2972    When d_nz, o_nz parameters are specified, d_nz storage elements are
2973    allocated for every row of the local diagonal submatrix, and o_nz
2974    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2975    One way to choose d_nz and o_nz is to use the max nonzerors per local
2976    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2977    In this case, the values of d_nz,o_nz are:
2978 .vb
2979      proc0 : dnz = 2, o_nz = 2
2980      proc1 : dnz = 3, o_nz = 2
2981      proc2 : dnz = 1, o_nz = 4
2982 .ve
2983    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2984    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2985    for proc3. i.e we are using 12+15+10=37 storage locations to store
2986    34 values.
2987 
2988    When d_nnz, o_nnz parameters are specified, the storage is specified
2989    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2990    In the above case the values for d_nnz,o_nnz are:
2991 .vb
2992      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2993      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2994      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2995 .ve
2996    Here the space allocated is sum of all the above values i.e 34, and
2997    hence pre-allocation is perfect.
2998 
2999    Level: intermediate
3000 
3001 .keywords: matrix, aij, compressed row, sparse, parallel
3002 
3003 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3004           MPIAIJ
3005 @*/
3006 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3007 {
3008   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3009 
3010   PetscFunctionBegin;
3011   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3012   if (f) {
3013     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3014   }
3015   PetscFunctionReturn(0);
3016 }
3017 
3018 #undef __FUNCT__
3019 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3020 /*@C
3021      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3022          CSR format the local rows.
3023 
3024    Collective on MPI_Comm
3025 
3026    Input Parameters:
3027 +  comm - MPI communicator
3028 .  m - number of local rows (Cannot be PETSC_DECIDE)
3029 .  n - This value should be the same as the local size used in creating the
3030        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3031        calculated if N is given) For square matrices n is almost always m.
3032 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3033 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3034 .   i - row indices
3035 .   j - column indices
3036 -   a - matrix values
3037 
3038    Output Parameter:
3039 .   mat - the matrix
3040 
3041    Level: intermediate
3042 
3043    Notes:
3044        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3045      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3046      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3047 
3048        The i and j indices are 0 based
3049 
3050 .keywords: matrix, aij, compressed row, sparse, parallel
3051 
3052 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3053           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3054 @*/
3055 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3056 {
3057   PetscErrorCode ierr;
3058 
3059  PetscFunctionBegin;
3060   if (i[0]) {
3061     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3062   }
3063   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3064   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3065   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3066   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3067   PetscFunctionReturn(0);
3068 }
3069 
3070 #undef __FUNCT__
3071 #define __FUNCT__ "MatCreateMPIAIJ"
3072 /*@C
3073    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3074    (the default parallel PETSc format).  For good matrix assembly performance
3075    the user should preallocate the matrix storage by setting the parameters
3076    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3077    performance can be increased by more than a factor of 50.
3078 
3079    Collective on MPI_Comm
3080 
3081    Input Parameters:
3082 +  comm - MPI communicator
3083 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3084            This value should be the same as the local size used in creating the
3085            y vector for the matrix-vector product y = Ax.
3086 .  n - This value should be the same as the local size used in creating the
3087        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3088        calculated if N is given) For square matrices n is almost always m.
3089 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3090 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3091 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3092            (same value is used for all local rows)
3093 .  d_nnz - array containing the number of nonzeros in the various rows of the
3094            DIAGONAL portion of the local submatrix (possibly different for each row)
3095            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3096            The size of this array is equal to the number of local rows, i.e 'm'.
3097            You must leave room for the diagonal entry even if it is zero.
3098 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3099            submatrix (same value is used for all local rows).
3100 -  o_nnz - array containing the number of nonzeros in the various rows of the
3101            OFF-DIAGONAL portion of the local submatrix (possibly different for
3102            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3103            structure. The size of this array is equal to the number
3104            of local rows, i.e 'm'.
3105 
3106    Output Parameter:
3107 .  A - the matrix
3108 
3109    Notes:
3110    If the *_nnz parameter is given then the *_nz parameter is ignored
3111 
3112    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3113    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3114    storage requirements for this matrix.
3115 
3116    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3117    processor than it must be used on all processors that share the object for
3118    that argument.
3119 
3120    The user MUST specify either the local or global matrix dimensions
3121    (possibly both).
3122 
3123    The parallel matrix is partitioned such that the first m0 rows belong to
3124    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3125    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3126 
3127    The DIAGONAL portion of the local submatrix of a processor can be defined
3128    as the submatrix which is obtained by extraction the part corresponding
3129    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3130    first row that belongs to the processor, and r2 is the last row belonging
3131    to the this processor. This is a square mxm matrix. The remaining portion
3132    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3133 
3134    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3135 
3136    When calling this routine with a single process communicator, a matrix of
3137    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3138    type of communicator, use the construction mechanism:
3139      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3140 
3141    By default, this format uses inodes (identical nodes) when possible.
3142    We search for consecutive rows with the same nonzero structure, thereby
3143    reusing matrix information to achieve increased efficiency.
3144 
3145    Options Database Keys:
3146 +  -mat_no_inode  - Do not use inodes
3147 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3148 -  -mat_aij_oneindex - Internally use indexing starting at 1
3149         rather than 0.  Note that when calling MatSetValues(),
3150         the user still MUST index entries starting at 0!
3151 
3152 
3153    Example usage:
3154 
3155    Consider the following 8x8 matrix with 34 non-zero values, that is
3156    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3157    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3158    as follows:
3159 
3160 .vb
3161             1  2  0  |  0  3  0  |  0  4
3162     Proc0   0  5  6  |  7  0  0  |  8  0
3163             9  0 10  | 11  0  0  | 12  0
3164     -------------------------------------
3165            13  0 14  | 15 16 17  |  0  0
3166     Proc1   0 18  0  | 19 20 21  |  0  0
3167             0  0  0  | 22 23  0  | 24  0
3168     -------------------------------------
3169     Proc2  25 26 27  |  0  0 28  | 29  0
3170            30  0  0  | 31 32 33  |  0 34
3171 .ve
3172 
3173    This can be represented as a collection of submatrices as:
3174 
3175 .vb
3176       A B C
3177       D E F
3178       G H I
3179 .ve
3180 
3181    Where the submatrices A,B,C are owned by proc0, D,E,F are
3182    owned by proc1, G,H,I are owned by proc2.
3183 
3184    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3185    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3186    The 'M','N' parameters are 8,8, and have the same values on all procs.
3187 
3188    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3189    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3190    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3191    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3192    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3193    matrix, ans [DF] as another SeqAIJ matrix.
3194 
3195    When d_nz, o_nz parameters are specified, d_nz storage elements are
3196    allocated for every row of the local diagonal submatrix, and o_nz
3197    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3198    One way to choose d_nz and o_nz is to use the max nonzerors per local
3199    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3200    In this case, the values of d_nz,o_nz are:
3201 .vb
3202      proc0 : dnz = 2, o_nz = 2
3203      proc1 : dnz = 3, o_nz = 2
3204      proc2 : dnz = 1, o_nz = 4
3205 .ve
3206    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3207    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3208    for proc3. i.e we are using 12+15+10=37 storage locations to store
3209    34 values.
3210 
3211    When d_nnz, o_nnz parameters are specified, the storage is specified
3212    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3213    In the above case the values for d_nnz,o_nnz are:
3214 .vb
3215      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3216      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3217      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3218 .ve
3219    Here the space allocated is sum of all the above values i.e 34, and
3220    hence pre-allocation is perfect.
3221 
3222    Level: intermediate
3223 
3224 .keywords: matrix, aij, compressed row, sparse, parallel
3225 
3226 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3227           MPIAIJ, MatCreateMPIAIJWithArrays()
3228 @*/
3229 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3230 {
3231   PetscErrorCode ierr;
3232   PetscMPIInt    size;
3233 
3234   PetscFunctionBegin;
3235   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3236   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3237   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3238   if (size > 1) {
3239     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3240     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3241   } else {
3242     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3243     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3244   }
3245   PetscFunctionReturn(0);
3246 }
3247 
3248 #undef __FUNCT__
3249 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3250 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3251 {
3252   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3253 
3254   PetscFunctionBegin;
3255   *Ad     = a->A;
3256   *Ao     = a->B;
3257   *colmap = a->garray;
3258   PetscFunctionReturn(0);
3259 }
3260 
3261 #undef __FUNCT__
3262 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3263 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3264 {
3265   PetscErrorCode ierr;
3266   PetscInt       i;
3267   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3268 
3269   PetscFunctionBegin;
3270   if (coloring->ctype == IS_COLORING_GLOBAL) {
3271     ISColoringValue *allcolors,*colors;
3272     ISColoring      ocoloring;
3273 
3274     /* set coloring for diagonal portion */
3275     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3276 
3277     /* set coloring for off-diagonal portion */
3278     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3279     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3280     for (i=0; i<a->B->cmap.n; i++) {
3281       colors[i] = allcolors[a->garray[i]];
3282     }
3283     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3284     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3285     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3286     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3287   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3288     ISColoringValue *colors;
3289     PetscInt        *larray;
3290     ISColoring      ocoloring;
3291 
3292     /* set coloring for diagonal portion */
3293     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3294     for (i=0; i<a->A->cmap.n; i++) {
3295       larray[i] = i + A->cmap.rstart;
3296     }
3297     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3298     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3299     for (i=0; i<a->A->cmap.n; i++) {
3300       colors[i] = coloring->colors[larray[i]];
3301     }
3302     ierr = PetscFree(larray);CHKERRQ(ierr);
3303     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3304     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3305     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3306 
3307     /* set coloring for off-diagonal portion */
3308     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3309     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3310     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3311     for (i=0; i<a->B->cmap.n; i++) {
3312       colors[i] = coloring->colors[larray[i]];
3313     }
3314     ierr = PetscFree(larray);CHKERRQ(ierr);
3315     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3316     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3317     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3318   } else {
3319     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3320   }
3321 
3322   PetscFunctionReturn(0);
3323 }
3324 
3325 #if defined(PETSC_HAVE_ADIC)
3326 #undef __FUNCT__
3327 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3328 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3329 {
3330   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3331   PetscErrorCode ierr;
3332 
3333   PetscFunctionBegin;
3334   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3335   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3336   PetscFunctionReturn(0);
3337 }
3338 #endif
3339 
3340 #undef __FUNCT__
3341 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3342 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3343 {
3344   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3345   PetscErrorCode ierr;
3346 
3347   PetscFunctionBegin;
3348   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3349   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3350   PetscFunctionReturn(0);
3351 }
3352 
3353 #undef __FUNCT__
3354 #define __FUNCT__ "MatMerge"
3355 /*@C
3356       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3357                  matrices from each processor
3358 
3359     Collective on MPI_Comm
3360 
3361    Input Parameters:
3362 +    comm - the communicators the parallel matrix will live on
3363 .    inmat - the input sequential matrices
3364 .    n - number of local columns (or PETSC_DECIDE)
3365 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3366 
3367    Output Parameter:
3368 .    outmat - the parallel matrix generated
3369 
3370     Level: advanced
3371 
3372    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3373 
3374 @*/
3375 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3376 {
3377   PetscErrorCode ierr;
3378   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3379   PetscInt       *indx;
3380   PetscScalar    *values;
3381 
3382   PetscFunctionBegin;
3383   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3384   if (scall == MAT_INITIAL_MATRIX){
3385     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3386     if (n == PETSC_DECIDE){
3387       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3388     }
3389     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3390     rstart -= m;
3391 
3392     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3393     for (i=0;i<m;i++) {
3394       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3395       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3396       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3397     }
3398     /* This routine will ONLY return MPIAIJ type matrix */
3399     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3400     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3401     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3402     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3403     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3404 
3405   } else if (scall == MAT_REUSE_MATRIX){
3406     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3407   } else {
3408     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3409   }
3410 
3411   for (i=0;i<m;i++) {
3412     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3413     Ii    = i + rstart;
3414     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3415     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3416   }
3417   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3418   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3419   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3420 
3421   PetscFunctionReturn(0);
3422 }
3423 
3424 #undef __FUNCT__
3425 #define __FUNCT__ "MatFileSplit"
3426 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3427 {
3428   PetscErrorCode    ierr;
3429   PetscMPIInt       rank;
3430   PetscInt          m,N,i,rstart,nnz;
3431   size_t            len;
3432   const PetscInt    *indx;
3433   PetscViewer       out;
3434   char              *name;
3435   Mat               B;
3436   const PetscScalar *values;
3437 
3438   PetscFunctionBegin;
3439   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3440   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3441   /* Should this be the type of the diagonal block of A? */
3442   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3443   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3444   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3445   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3446   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3447   for (i=0;i<m;i++) {
3448     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3449     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3450     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3451   }
3452   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3453   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3454 
3455   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
3456   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3457   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3458   sprintf(name,"%s.%d",outfile,rank);
3459   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3460   ierr = PetscFree(name);
3461   ierr = MatView(B,out);CHKERRQ(ierr);
3462   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3463   ierr = MatDestroy(B);CHKERRQ(ierr);
3464   PetscFunctionReturn(0);
3465 }
3466 
3467 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3468 #undef __FUNCT__
3469 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3470 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3471 {
3472   PetscErrorCode       ierr;
3473   Mat_Merge_SeqsToMPI  *merge;
3474   PetscContainer       container;
3475 
3476   PetscFunctionBegin;
3477   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3478   if (container) {
3479     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3480     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3481     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3482     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3483     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3484     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3485     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3486     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3487     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3488     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3489     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3490     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3491 
3492     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3493     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3494   }
3495   ierr = PetscFree(merge);CHKERRQ(ierr);
3496 
3497   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3498   PetscFunctionReturn(0);
3499 }
3500 
3501 #include "src/mat/utils/freespace.h"
3502 #include "petscbt.h"
3503 static PetscEvent logkey_seqstompinum = 0;
3504 #undef __FUNCT__
3505 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3506 /*@C
3507       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3508                  matrices from each processor
3509 
3510     Collective on MPI_Comm
3511 
3512    Input Parameters:
3513 +    comm - the communicators the parallel matrix will live on
3514 .    seqmat - the input sequential matrices
3515 .    m - number of local rows (or PETSC_DECIDE)
3516 .    n - number of local columns (or PETSC_DECIDE)
3517 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3518 
3519    Output Parameter:
3520 .    mpimat - the parallel matrix generated
3521 
3522     Level: advanced
3523 
3524    Notes:
3525      The dimensions of the sequential matrix in each processor MUST be the same.
3526      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3527      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3528 @*/
3529 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3530 {
3531   PetscErrorCode       ierr;
3532   MPI_Comm             comm=mpimat->comm;
3533   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3534   PetscMPIInt          size,rank,taga,*len_s;
3535   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3536   PetscInt             proc,m;
3537   PetscInt             **buf_ri,**buf_rj;
3538   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3539   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3540   MPI_Request          *s_waits,*r_waits;
3541   MPI_Status           *status;
3542   MatScalar            *aa=a->a,**abuf_r,*ba_i;
3543   Mat_Merge_SeqsToMPI  *merge;
3544   PetscContainer       container;
3545 
3546   PetscFunctionBegin;
3547   if (!logkey_seqstompinum) {
3548     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
3549   }
3550   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3551 
3552   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3553   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3554 
3555   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3556   if (container) {
3557     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3558   }
3559   bi     = merge->bi;
3560   bj     = merge->bj;
3561   buf_ri = merge->buf_ri;
3562   buf_rj = merge->buf_rj;
3563 
3564   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3565   owners = merge->rowmap.range;
3566   len_s  = merge->len_s;
3567 
3568   /* send and recv matrix values */
3569   /*-----------------------------*/
3570   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3571   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3572 
3573   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3574   for (proc=0,k=0; proc<size; proc++){
3575     if (!len_s[proc]) continue;
3576     i = owners[proc];
3577     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3578     k++;
3579   }
3580 
3581   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3582   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3583   ierr = PetscFree(status);CHKERRQ(ierr);
3584 
3585   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3586   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3587 
3588   /* insert mat values of mpimat */
3589   /*----------------------------*/
3590   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3591   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3592   nextrow = buf_ri_k + merge->nrecv;
3593   nextai  = nextrow + merge->nrecv;
3594 
3595   for (k=0; k<merge->nrecv; k++){
3596     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3597     nrows = *(buf_ri_k[k]);
3598     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3599     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3600   }
3601 
3602   /* set values of ba */
3603   m = merge->rowmap.n;
3604   for (i=0; i<m; i++) {
3605     arow = owners[rank] + i;
3606     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3607     bnzi = bi[i+1] - bi[i];
3608     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3609 
3610     /* add local non-zero vals of this proc's seqmat into ba */
3611     anzi = ai[arow+1] - ai[arow];
3612     aj   = a->j + ai[arow];
3613     aa   = a->a + ai[arow];
3614     nextaj = 0;
3615     for (j=0; nextaj<anzi; j++){
3616       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3617         ba_i[j] += aa[nextaj++];
3618       }
3619     }
3620 
3621     /* add received vals into ba */
3622     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3623       /* i-th row */
3624       if (i == *nextrow[k]) {
3625         anzi = *(nextai[k]+1) - *nextai[k];
3626         aj   = buf_rj[k] + *(nextai[k]);
3627         aa   = abuf_r[k] + *(nextai[k]);
3628         nextaj = 0;
3629         for (j=0; nextaj<anzi; j++){
3630           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3631             ba_i[j] += aa[nextaj++];
3632           }
3633         }
3634         nextrow[k]++; nextai[k]++;
3635       }
3636     }
3637     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3638   }
3639   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3640   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3641 
3642   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3643   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3644   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3645   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3646   PetscFunctionReturn(0);
3647 }
3648 
3649 static PetscEvent logkey_seqstompisym = 0;
3650 #undef __FUNCT__
3651 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3652 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3653 {
3654   PetscErrorCode       ierr;
3655   Mat                  B_mpi;
3656   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3657   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3658   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3659   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3660   PetscInt             len,proc,*dnz,*onz;
3661   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3662   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3663   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3664   MPI_Status           *status;
3665   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3666   PetscBT              lnkbt;
3667   Mat_Merge_SeqsToMPI  *merge;
3668   PetscContainer       container;
3669 
3670   PetscFunctionBegin;
3671   if (!logkey_seqstompisym) {
3672     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3673   }
3674   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3675 
3676   /* make sure it is a PETSc comm */
3677   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3678   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3679   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3680 
3681   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3682   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3683 
3684   /* determine row ownership */
3685   /*---------------------------------------------------------*/
3686   merge->rowmap.n = m;
3687   merge->rowmap.N = M;
3688   merge->rowmap.bs = 1;
3689   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
3690   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3691   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3692 
3693   m      = merge->rowmap.n;
3694   M      = merge->rowmap.N;
3695   owners = merge->rowmap.range;
3696 
3697   /* determine the number of messages to send, their lengths */
3698   /*---------------------------------------------------------*/
3699   len_s  = merge->len_s;
3700 
3701   len = 0;  /* length of buf_si[] */
3702   merge->nsend = 0;
3703   for (proc=0; proc<size; proc++){
3704     len_si[proc] = 0;
3705     if (proc == rank){
3706       len_s[proc] = 0;
3707     } else {
3708       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3709       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3710     }
3711     if (len_s[proc]) {
3712       merge->nsend++;
3713       nrows = 0;
3714       for (i=owners[proc]; i<owners[proc+1]; i++){
3715         if (ai[i+1] > ai[i]) nrows++;
3716       }
3717       len_si[proc] = 2*(nrows+1);
3718       len += len_si[proc];
3719     }
3720   }
3721 
3722   /* determine the number and length of messages to receive for ij-structure */
3723   /*-------------------------------------------------------------------------*/
3724   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3725   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3726 
3727   /* post the Irecv of j-structure */
3728   /*-------------------------------*/
3729   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
3730   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3731 
3732   /* post the Isend of j-structure */
3733   /*--------------------------------*/
3734   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3735   sj_waits = si_waits + merge->nsend;
3736 
3737   for (proc=0, k=0; proc<size; proc++){
3738     if (!len_s[proc]) continue;
3739     i = owners[proc];
3740     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3741     k++;
3742   }
3743 
3744   /* receives and sends of j-structure are complete */
3745   /*------------------------------------------------*/
3746   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3747   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3748 
3749   /* send and recv i-structure */
3750   /*---------------------------*/
3751   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
3752   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3753 
3754   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3755   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3756   for (proc=0,k=0; proc<size; proc++){
3757     if (!len_s[proc]) continue;
3758     /* form outgoing message for i-structure:
3759          buf_si[0]:                 nrows to be sent
3760                [1:nrows]:           row index (global)
3761                [nrows+1:2*nrows+1]: i-structure index
3762     */
3763     /*-------------------------------------------*/
3764     nrows = len_si[proc]/2 - 1;
3765     buf_si_i    = buf_si + nrows+1;
3766     buf_si[0]   = nrows;
3767     buf_si_i[0] = 0;
3768     nrows = 0;
3769     for (i=owners[proc]; i<owners[proc+1]; i++){
3770       anzi = ai[i+1] - ai[i];
3771       if (anzi) {
3772         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3773         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3774         nrows++;
3775       }
3776     }
3777     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3778     k++;
3779     buf_si += len_si[proc];
3780   }
3781 
3782   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3783   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3784 
3785   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3786   for (i=0; i<merge->nrecv; i++){
3787     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3788   }
3789 
3790   ierr = PetscFree(len_si);CHKERRQ(ierr);
3791   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3792   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3793   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3794   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3795   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3796   ierr = PetscFree(status);CHKERRQ(ierr);
3797 
3798   /* compute a local seq matrix in each processor */
3799   /*----------------------------------------------*/
3800   /* allocate bi array and free space for accumulating nonzero column info */
3801   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3802   bi[0] = 0;
3803 
3804   /* create and initialize a linked list */
3805   nlnk = N+1;
3806   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3807 
3808   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3809   len = 0;
3810   len  = ai[owners[rank+1]] - ai[owners[rank]];
3811   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3812   current_space = free_space;
3813 
3814   /* determine symbolic info for each local row */
3815   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3816   nextrow = buf_ri_k + merge->nrecv;
3817   nextai  = nextrow + merge->nrecv;
3818   for (k=0; k<merge->nrecv; k++){
3819     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3820     nrows = *buf_ri_k[k];
3821     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3822     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3823   }
3824 
3825   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3826   len = 0;
3827   for (i=0;i<m;i++) {
3828     bnzi   = 0;
3829     /* add local non-zero cols of this proc's seqmat into lnk */
3830     arow   = owners[rank] + i;
3831     anzi   = ai[arow+1] - ai[arow];
3832     aj     = a->j + ai[arow];
3833     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3834     bnzi += nlnk;
3835     /* add received col data into lnk */
3836     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3837       if (i == *nextrow[k]) { /* i-th row */
3838         anzi = *(nextai[k]+1) - *nextai[k];
3839         aj   = buf_rj[k] + *nextai[k];
3840         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3841         bnzi += nlnk;
3842         nextrow[k]++; nextai[k]++;
3843       }
3844     }
3845     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3846 
3847     /* if free space is not available, make more free space */
3848     if (current_space->local_remaining<bnzi) {
3849       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3850       nspacedouble++;
3851     }
3852     /* copy data into free space, then initialize lnk */
3853     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3854     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3855 
3856     current_space->array           += bnzi;
3857     current_space->local_used      += bnzi;
3858     current_space->local_remaining -= bnzi;
3859 
3860     bi[i+1] = bi[i] + bnzi;
3861   }
3862 
3863   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3864 
3865   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3866   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3867   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3868 
3869   /* create symbolic parallel matrix B_mpi */
3870   /*---------------------------------------*/
3871   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3872   if (n==PETSC_DECIDE) {
3873     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3874   } else {
3875     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3876   }
3877   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3878   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3879   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3880 
3881   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3882   B_mpi->assembled     = PETSC_FALSE;
3883   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3884   merge->bi            = bi;
3885   merge->bj            = bj;
3886   merge->buf_ri        = buf_ri;
3887   merge->buf_rj        = buf_rj;
3888   merge->coi           = PETSC_NULL;
3889   merge->coj           = PETSC_NULL;
3890   merge->owners_co     = PETSC_NULL;
3891 
3892   /* attach the supporting struct to B_mpi for reuse */
3893   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3894   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
3895   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3896   *mpimat = B_mpi;
3897 
3898   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3899   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3900   PetscFunctionReturn(0);
3901 }
3902 
3903 static PetscEvent logkey_seqstompi = 0;
3904 #undef __FUNCT__
3905 #define __FUNCT__ "MatMerge_SeqsToMPI"
3906 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3907 {
3908   PetscErrorCode   ierr;
3909 
3910   PetscFunctionBegin;
3911   if (!logkey_seqstompi) {
3912     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3913   }
3914   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3915   if (scall == MAT_INITIAL_MATRIX){
3916     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3917   }
3918   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3919   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3920   PetscFunctionReturn(0);
3921 }
3922 static PetscEvent logkey_getlocalmat = 0;
3923 #undef __FUNCT__
3924 #define __FUNCT__ "MatGetLocalMat"
3925 /*@C
3926      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3927 
3928     Not Collective
3929 
3930    Input Parameters:
3931 +    A - the matrix
3932 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3933 
3934    Output Parameter:
3935 .    A_loc - the local sequential matrix generated
3936 
3937     Level: developer
3938 
3939 @*/
3940 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3941 {
3942   PetscErrorCode  ierr;
3943   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3944   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3945   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3946   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3947   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
3948   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3949 
3950   PetscFunctionBegin;
3951   if (!logkey_getlocalmat) {
3952     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3953   }
3954   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3955   if (scall == MAT_INITIAL_MATRIX){
3956     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3957     ci[0] = 0;
3958     for (i=0; i<am; i++){
3959       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3960     }
3961     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3962     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3963     k = 0;
3964     for (i=0; i<am; i++) {
3965       ncols_o = bi[i+1] - bi[i];
3966       ncols_d = ai[i+1] - ai[i];
3967       /* off-diagonal portion of A */
3968       for (jo=0; jo<ncols_o; jo++) {
3969         col = cmap[*bj];
3970         if (col >= cstart) break;
3971         cj[k]   = col; bj++;
3972         ca[k++] = *ba++;
3973       }
3974       /* diagonal portion of A */
3975       for (j=0; j<ncols_d; j++) {
3976         cj[k]   = cstart + *aj++;
3977         ca[k++] = *aa++;
3978       }
3979       /* off-diagonal portion of A */
3980       for (j=jo; j<ncols_o; j++) {
3981         cj[k]   = cmap[*bj++];
3982         ca[k++] = *ba++;
3983       }
3984     }
3985     /* put together the new matrix */
3986     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3987     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3988     /* Since these are PETSc arrays, change flags to free them as necessary. */
3989     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
3990     mat->free_a  = PETSC_TRUE;
3991     mat->free_ij = PETSC_TRUE;
3992     mat->nonew   = 0;
3993   } else if (scall == MAT_REUSE_MATRIX){
3994     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3995     ci = mat->i; cj = mat->j; ca = mat->a;
3996     for (i=0; i<am; i++) {
3997       /* off-diagonal portion of A */
3998       ncols_o = bi[i+1] - bi[i];
3999       for (jo=0; jo<ncols_o; jo++) {
4000         col = cmap[*bj];
4001         if (col >= cstart) break;
4002         *ca++ = *ba++; bj++;
4003       }
4004       /* diagonal portion of A */
4005       ncols_d = ai[i+1] - ai[i];
4006       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
4007       /* off-diagonal portion of A */
4008       for (j=jo; j<ncols_o; j++) {
4009         *ca++ = *ba++; bj++;
4010       }
4011     }
4012   } else {
4013     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4014   }
4015 
4016   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
4017   PetscFunctionReturn(0);
4018 }
4019 
4020 static PetscEvent logkey_getlocalmatcondensed = 0;
4021 #undef __FUNCT__
4022 #define __FUNCT__ "MatGetLocalMatCondensed"
4023 /*@C
4024      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4025 
4026     Not Collective
4027 
4028    Input Parameters:
4029 +    A - the matrix
4030 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4031 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4032 
4033    Output Parameter:
4034 .    A_loc - the local sequential matrix generated
4035 
4036     Level: developer
4037 
4038 @*/
4039 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4040 {
4041   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4042   PetscErrorCode    ierr;
4043   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4044   IS                isrowa,iscola;
4045   Mat               *aloc;
4046 
4047   PetscFunctionBegin;
4048   if (!logkey_getlocalmatcondensed) {
4049     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
4050   }
4051   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4052   if (!row){
4053     start = A->rmap.rstart; end = A->rmap.rend;
4054     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4055   } else {
4056     isrowa = *row;
4057   }
4058   if (!col){
4059     start = A->cmap.rstart;
4060     cmap  = a->garray;
4061     nzA   = a->A->cmap.n;
4062     nzB   = a->B->cmap.n;
4063     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4064     ncols = 0;
4065     for (i=0; i<nzB; i++) {
4066       if (cmap[i] < start) idx[ncols++] = cmap[i];
4067       else break;
4068     }
4069     imark = i;
4070     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4071     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4072     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4073     ierr = PetscFree(idx);CHKERRQ(ierr);
4074   } else {
4075     iscola = *col;
4076   }
4077   if (scall != MAT_INITIAL_MATRIX){
4078     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4079     aloc[0] = *A_loc;
4080   }
4081   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4082   *A_loc = aloc[0];
4083   ierr = PetscFree(aloc);CHKERRQ(ierr);
4084   if (!row){
4085     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4086   }
4087   if (!col){
4088     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4089   }
4090   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4091   PetscFunctionReturn(0);
4092 }
4093 
4094 static PetscEvent logkey_GetBrowsOfAcols = 0;
4095 #undef __FUNCT__
4096 #define __FUNCT__ "MatGetBrowsOfAcols"
4097 /*@C
4098     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4099 
4100     Collective on Mat
4101 
4102    Input Parameters:
4103 +    A,B - the matrices in mpiaij format
4104 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4105 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4106 
4107    Output Parameter:
4108 +    rowb, colb - index sets of rows and columns of B to extract
4109 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
4110 -    B_seq - the sequential matrix generated
4111 
4112     Level: developer
4113 
4114 @*/
4115 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4116 {
4117   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4118   PetscErrorCode    ierr;
4119   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4120   IS                isrowb,iscolb;
4121   Mat               *bseq;
4122 
4123   PetscFunctionBegin;
4124   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4125     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4126   }
4127   if (!logkey_GetBrowsOfAcols) {
4128     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
4129   }
4130   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4131 
4132   if (scall == MAT_INITIAL_MATRIX){
4133     start = A->cmap.rstart;
4134     cmap  = a->garray;
4135     nzA   = a->A->cmap.n;
4136     nzB   = a->B->cmap.n;
4137     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4138     ncols = 0;
4139     for (i=0; i<nzB; i++) {  /* row < local row index */
4140       if (cmap[i] < start) idx[ncols++] = cmap[i];
4141       else break;
4142     }
4143     imark = i;
4144     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4145     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4146     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4147     ierr = PetscFree(idx);CHKERRQ(ierr);
4148     *brstart = imark;
4149     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
4150   } else {
4151     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4152     isrowb = *rowb; iscolb = *colb;
4153     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4154     bseq[0] = *B_seq;
4155   }
4156   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4157   *B_seq = bseq[0];
4158   ierr = PetscFree(bseq);CHKERRQ(ierr);
4159   if (!rowb){
4160     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4161   } else {
4162     *rowb = isrowb;
4163   }
4164   if (!colb){
4165     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4166   } else {
4167     *colb = iscolb;
4168   }
4169   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4170   PetscFunctionReturn(0);
4171 }
4172 
4173 static PetscEvent logkey_GetBrowsOfAocols = 0;
4174 #undef __FUNCT__
4175 #define __FUNCT__ "MatGetBrowsOfAoCols"
4176 /*@C
4177     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4178     of the OFF-DIAGONAL portion of local A
4179 
4180     Collective on Mat
4181 
4182    Input Parameters:
4183 +    A,B - the matrices in mpiaij format
4184 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4185 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4186 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4187 
4188    Output Parameter:
4189 +    B_oth - the sequential matrix generated
4190 
4191     Level: developer
4192 
4193 @*/
4194 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
4195 {
4196   VecScatter_MPI_General *gen_to,*gen_from;
4197   PetscErrorCode         ierr;
4198   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4199   Mat_SeqAIJ             *b_oth;
4200   VecScatter             ctx=a->Mvctx;
4201   MPI_Comm               comm=ctx->comm;
4202   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
4203   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
4204   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
4205   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4206   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4207   MPI_Status             *sstatus,rstatus;
4208   PetscInt               *cols,sbs,rbs;
4209   PetscScalar            *vals;
4210 
4211   PetscFunctionBegin;
4212   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4213     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4214   }
4215   if (!logkey_GetBrowsOfAocols) {
4216     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
4217   }
4218   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4219   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4220 
4221   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4222   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4223   rvalues  = gen_from->values; /* holds the length of receiving row */
4224   svalues  = gen_to->values;   /* holds the length of sending row */
4225   nrecvs   = gen_from->n;
4226   nsends   = gen_to->n;
4227 
4228   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4229   srow     = gen_to->indices;   /* local row index to be sent */
4230   sstarts  = gen_to->starts;
4231   sprocs   = gen_to->procs;
4232   sstatus  = gen_to->sstatus;
4233   sbs      = gen_to->bs;
4234   rstarts  = gen_from->starts;
4235   rprocs   = gen_from->procs;
4236   rbs      = gen_from->bs;
4237 
4238   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4239   if (scall == MAT_INITIAL_MATRIX){
4240     /* i-array */
4241     /*---------*/
4242     /*  post receives */
4243     for (i=0; i<nrecvs; i++){
4244       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4245       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4246       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4247     }
4248 
4249     /* pack the outgoing message */
4250     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4251     rstartsj = sstartsj + nsends +1;
4252     sstartsj[0] = 0;  rstartsj[0] = 0;
4253     len = 0; /* total length of j or a array to be sent */
4254     k = 0;
4255     for (i=0; i<nsends; i++){
4256       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4257       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4258       for (j=0; j<nrows; j++) {
4259         row = srow[k] + B->rmap.range[rank]; /* global row idx */
4260         for (l=0; l<sbs; l++){
4261           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4262           rowlen[j*sbs+l] = ncols;
4263           len += ncols;
4264           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4265         }
4266         k++;
4267       }
4268       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4269       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4270     }
4271     /* recvs and sends of i-array are completed */
4272     i = nrecvs;
4273     while (i--) {
4274       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
4275     }
4276     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4277 
4278     /* allocate buffers for sending j and a arrays */
4279     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4280     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4281 
4282     /* create i-array of B_oth */
4283     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4284     b_othi[0] = 0;
4285     len = 0; /* total length of j or a array to be received */
4286     k = 0;
4287     for (i=0; i<nrecvs; i++){
4288       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4289       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4290       for (j=0; j<nrows; j++) {
4291         b_othi[k+1] = b_othi[k] + rowlen[j];
4292         len += rowlen[j]; k++;
4293       }
4294       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4295     }
4296 
4297     /* allocate space for j and a arrrays of B_oth */
4298     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4299     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
4300 
4301     /* j-array */
4302     /*---------*/
4303     /*  post receives of j-array */
4304     for (i=0; i<nrecvs; i++){
4305       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4306       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4307     }
4308 
4309     /* pack the outgoing message j-array */
4310     k = 0;
4311     for (i=0; i<nsends; i++){
4312       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4313       bufJ = bufj+sstartsj[i];
4314       for (j=0; j<nrows; j++) {
4315         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4316         for (ll=0; ll<sbs; ll++){
4317           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4318           for (l=0; l<ncols; l++){
4319             *bufJ++ = cols[l];
4320           }
4321           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4322         }
4323       }
4324       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4325     }
4326 
4327     /* recvs and sends of j-array are completed */
4328     i = nrecvs;
4329     while (i--) {
4330       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
4331     }
4332     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4333   } else if (scall == MAT_REUSE_MATRIX){
4334     sstartsj = *startsj;
4335     rstartsj = sstartsj + nsends +1;
4336     bufa     = *bufa_ptr;
4337     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4338     b_otha   = b_oth->a;
4339   } else {
4340     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4341   }
4342 
4343   /* a-array */
4344   /*---------*/
4345   /*  post receives of a-array */
4346   for (i=0; i<nrecvs; i++){
4347     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4348     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4349   }
4350 
4351   /* pack the outgoing message a-array */
4352   k = 0;
4353   for (i=0; i<nsends; i++){
4354     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4355     bufA = bufa+sstartsj[i];
4356     for (j=0; j<nrows; j++) {
4357       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4358       for (ll=0; ll<sbs; ll++){
4359         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4360         for (l=0; l<ncols; l++){
4361           *bufA++ = vals[l];
4362         }
4363         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4364       }
4365     }
4366     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4367   }
4368   /* recvs and sends of a-array are completed */
4369   i = nrecvs;
4370   while (i--) {
4371     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
4372   }
4373   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4374   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4375 
4376   if (scall == MAT_INITIAL_MATRIX){
4377     /* put together the new matrix */
4378     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4379 
4380     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4381     /* Since these are PETSc arrays, change flags to free them as necessary. */
4382     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4383     b_oth->free_a  = PETSC_TRUE;
4384     b_oth->free_ij = PETSC_TRUE;
4385     b_oth->nonew   = 0;
4386 
4387     ierr = PetscFree(bufj);CHKERRQ(ierr);
4388     if (!startsj || !bufa_ptr){
4389       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4390       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4391     } else {
4392       *startsj  = sstartsj;
4393       *bufa_ptr = bufa;
4394     }
4395   }
4396   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4397   PetscFunctionReturn(0);
4398 }
4399 
4400 #undef __FUNCT__
4401 #define __FUNCT__ "MatGetCommunicationStructs"
4402 /*@C
4403   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4404 
4405   Not Collective
4406 
4407   Input Parameters:
4408 . A - The matrix in mpiaij format
4409 
4410   Output Parameter:
4411 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4412 . colmap - A map from global column index to local index into lvec
4413 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4414 
4415   Level: developer
4416 
4417 @*/
4418 #if defined (PETSC_USE_CTABLE)
4419 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4420 #else
4421 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4422 #endif
4423 {
4424   Mat_MPIAIJ *a;
4425 
4426   PetscFunctionBegin;
4427   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4428   PetscValidPointer(lvec, 2)
4429   PetscValidPointer(colmap, 3)
4430   PetscValidPointer(multScatter, 4)
4431   a = (Mat_MPIAIJ *) A->data;
4432   if (lvec) *lvec = a->lvec;
4433   if (colmap) *colmap = a->colmap;
4434   if (multScatter) *multScatter = a->Mvctx;
4435   PetscFunctionReturn(0);
4436 }
4437 
4438 EXTERN_C_BEGIN
4439 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*);
4440 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*);
4441 EXTERN_C_END
4442 
4443 /*MC
4444    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4445 
4446    Options Database Keys:
4447 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4448 
4449   Level: beginner
4450 
4451 .seealso: MatCreateMPIAIJ
4452 M*/
4453 
4454 EXTERN_C_BEGIN
4455 #undef __FUNCT__
4456 #define __FUNCT__ "MatCreate_MPIAIJ"
4457 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4458 {
4459   Mat_MPIAIJ     *b;
4460   PetscErrorCode ierr;
4461   PetscMPIInt    size;
4462 
4463   PetscFunctionBegin;
4464   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
4465 
4466   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
4467   B->data         = (void*)b;
4468   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4469   B->factor       = 0;
4470   B->rmap.bs      = 1;
4471   B->assembled    = PETSC_FALSE;
4472   B->mapping      = 0;
4473 
4474   B->insertmode      = NOT_SET_VALUES;
4475   b->size            = size;
4476   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
4477 
4478   /* build cache for off array entries formed */
4479   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
4480   b->donotstash  = PETSC_FALSE;
4481   b->colmap      = 0;
4482   b->garray      = 0;
4483   b->roworiented = PETSC_TRUE;
4484 
4485   /* stuff used for matrix vector multiply */
4486   b->lvec      = PETSC_NULL;
4487   b->Mvctx     = PETSC_NULL;
4488 
4489   /* stuff for MatGetRow() */
4490   b->rowindices   = 0;
4491   b->rowvalues    = 0;
4492   b->getrowactive = PETSC_FALSE;
4493 
4494 
4495   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4496                                      "MatStoreValues_MPIAIJ",
4497                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4498   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4499                                      "MatRetrieveValues_MPIAIJ",
4500                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4501   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4502 				     "MatGetDiagonalBlock_MPIAIJ",
4503                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4504   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4505 				     "MatIsTranspose_MPIAIJ",
4506 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4507   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4508 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4509 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4510   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4511 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4512 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4513   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4514 				     "MatDiagonalScaleLocal_MPIAIJ",
4515 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4516   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4517                                      "MatConvert_MPIAIJ_MPICSRPERM",
4518                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
4519   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4520                                      "MatConvert_MPIAIJ_MPICRL",
4521                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
4522   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4523   PetscFunctionReturn(0);
4524 }
4525 EXTERN_C_END
4526 
4527 #undef __FUNCT__
4528 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
4529 /*@C
4530      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4531          and "off-diagonal" part of the matrix in CSR format.
4532 
4533    Collective on MPI_Comm
4534 
4535    Input Parameters:
4536 +  comm - MPI communicator
4537 .  m - number of local rows (Cannot be PETSC_DECIDE)
4538 .  n - This value should be the same as the local size used in creating the
4539        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4540        calculated if N is given) For square matrices n is almost always m.
4541 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4542 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4543 .   i - row indices for "diagonal" portion of matrix
4544 .   j - column indices
4545 .   a - matrix values
4546 .   oi - row indices for "off-diagonal" portion of matrix
4547 .   oj - column indices
4548 -   oa - matrix values
4549 
4550    Output Parameter:
4551 .   mat - the matrix
4552 
4553    Level: advanced
4554 
4555    Notes:
4556        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4557 
4558        The i and j indices are 0 based
4559 
4560        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4561 
4562 
4563 .keywords: matrix, aij, compressed row, sparse, parallel
4564 
4565 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4566           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4567 @*/
4568 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4569 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4570 {
4571   PetscErrorCode ierr;
4572   Mat_MPIAIJ     *maij;
4573 
4574  PetscFunctionBegin;
4575   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4576   if (i[0]) {
4577     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4578   }
4579   if (oi[0]) {
4580     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4581   }
4582   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4583   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4584   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4585   maij = (Mat_MPIAIJ*) (*mat)->data;
4586   maij->donotstash     = PETSC_TRUE;
4587   (*mat)->preallocated = PETSC_TRUE;
4588 
4589   (*mat)->rmap.bs = (*mat)->cmap.bs = 1;
4590   ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr);
4591   ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr);
4592 
4593   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
4594   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
4595 
4596   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4597   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4598   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4599   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4600 
4601   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4602   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4603   PetscFunctionReturn(0);
4604 }
4605 
4606 /*
4607     Special version for direct calls from Fortran
4608 */
4609 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4610 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4611 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4612 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4613 #endif
4614 
4615 /* Change these macros so can be used in void function */
4616 #undef CHKERRQ
4617 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
4618 #undef SETERRQ2
4619 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
4620 #undef SETERRQ
4621 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
4622 
4623 EXTERN_C_BEGIN
4624 #undef __FUNCT__
4625 #define __FUNCT__ "matsetvaluesmpiaij_"
4626 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4627 {
4628   Mat            mat = *mmat;
4629   PetscInt       m = *mm, n = *mn;
4630   InsertMode     addv = *maddv;
4631   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
4632   PetscScalar    value;
4633   PetscErrorCode ierr;
4634 
4635   MatPreallocated(mat);
4636   if (mat->insertmode == NOT_SET_VALUES) {
4637     mat->insertmode = addv;
4638   }
4639 #if defined(PETSC_USE_DEBUG)
4640   else if (mat->insertmode != addv) {
4641     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4642   }
4643 #endif
4644   {
4645   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4646   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4647   PetscTruth     roworiented = aij->roworiented;
4648 
4649   /* Some Variables required in the macro */
4650   Mat            A = aij->A;
4651   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
4652   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4653   PetscScalar    *aa = a->a;
4654   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4655   Mat            B = aij->B;
4656   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4657   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4658   PetscScalar    *ba = b->a;
4659 
4660   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4661   PetscInt       nonew = a->nonew;
4662   PetscScalar    *ap1,*ap2;
4663 
4664   PetscFunctionBegin;
4665   for (i=0; i<m; i++) {
4666     if (im[i] < 0) continue;
4667 #if defined(PETSC_USE_DEBUG)
4668     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4669 #endif
4670     if (im[i] >= rstart && im[i] < rend) {
4671       row      = im[i] - rstart;
4672       lastcol1 = -1;
4673       rp1      = aj + ai[row];
4674       ap1      = aa + ai[row];
4675       rmax1    = aimax[row];
4676       nrow1    = ailen[row];
4677       low1     = 0;
4678       high1    = nrow1;
4679       lastcol2 = -1;
4680       rp2      = bj + bi[row];
4681       ap2      = ba + bi[row];
4682       rmax2    = bimax[row];
4683       nrow2    = bilen[row];
4684       low2     = 0;
4685       high2    = nrow2;
4686 
4687       for (j=0; j<n; j++) {
4688         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4689         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4690         if (in[j] >= cstart && in[j] < cend){
4691           col = in[j] - cstart;
4692           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4693         } else if (in[j] < 0) continue;
4694 #if defined(PETSC_USE_DEBUG)
4695         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4696 #endif
4697         else {
4698           if (mat->was_assembled) {
4699             if (!aij->colmap) {
4700               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4701             }
4702 #if defined (PETSC_USE_CTABLE)
4703             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4704 	    col--;
4705 #else
4706             col = aij->colmap[in[j]] - 1;
4707 #endif
4708             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4709               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4710               col =  in[j];
4711               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4712               B = aij->B;
4713               b = (Mat_SeqAIJ*)B->data;
4714               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4715               rp2      = bj + bi[row];
4716               ap2      = ba + bi[row];
4717               rmax2    = bimax[row];
4718               nrow2    = bilen[row];
4719               low2     = 0;
4720               high2    = nrow2;
4721               bm       = aij->B->rmap.n;
4722               ba = b->a;
4723             }
4724           } else col = in[j];
4725           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4726         }
4727       }
4728     } else {
4729       if (!aij->donotstash) {
4730         if (roworiented) {
4731           if (ignorezeroentries && v[i*n] == 0.0) continue;
4732           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4733         } else {
4734           if (ignorezeroentries && v[i] == 0.0) continue;
4735           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4736         }
4737       }
4738     }
4739   }}
4740   PetscFunctionReturnVoid();
4741 }
4742 EXTERN_C_END
4743 
4744