xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 1bb69edbc2f6efc8bcb8b8b24da8e7bcfef7c08b)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatGetValues_MPIAIJ"
240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
241 {
242   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
243   PetscErrorCode ierr;
244   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
245   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
246 
247   PetscFunctionBegin;
248   for (i=0; i<m; i++) {
249     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
250     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
251     if (idxm[i] >= rstart && idxm[i] < rend) {
252       row = idxm[i] - rstart;
253       for (j=0; j<n; j++) {
254         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
255         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
256         if (idxn[j] >= cstart && idxn[j] < cend){
257           col = idxn[j] - cstart;
258           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
259         } else {
260           if (!aij->colmap) {
261             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
262           }
263 #if defined (PETSC_USE_CTABLE)
264           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
265           col --;
266 #else
267           col = aij->colmap[idxn[j]] - 1;
268 #endif
269           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
270           else {
271             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
272           }
273         }
274       }
275     } else {
276       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
277     }
278   }
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
287   PetscErrorCode ierr;
288   PetscInt       nstash,reallocs;
289   InsertMode     addv;
290 
291   PetscFunctionBegin;
292   if (aij->donotstash) {
293     PetscFunctionReturn(0);
294   }
295 
296   /* make sure all processors are either in INSERTMODE or ADDMODE */
297   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
298   if (addv == (ADD_VALUES|INSERT_VALUES)) {
299     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
300   }
301   mat->insertmode = addv; /* in case this processor had no cache */
302 
303   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
304   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
305   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
312 {
313   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
314   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
315   PetscErrorCode ierr;
316   PetscMPIInt    n;
317   PetscInt       i,j,rstart,ncols,flg;
318   PetscInt       *row,*col,other_disassembled;
319   PetscScalar    *val;
320   InsertMode     addv = mat->insertmode;
321 
322   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
323   PetscFunctionBegin;
324   if (!aij->donotstash) {
325     while (1) {
326       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
327       if (!flg) break;
328 
329       for (i=0; i<n;) {
330         /* Now identify the consecutive vals belonging to the same row */
331         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
332         if (j < n) ncols = j-i;
333         else       ncols = n-i;
334         /* Now assemble all these values with a single function call */
335         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
336         i = j;
337       }
338     }
339     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
340   }
341   a->compressedrow.use     = PETSC_FALSE;
342   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
344 
345   /* determine if any processor has disassembled, if so we must
346      also disassemble ourselfs, in order that we may reassemble. */
347   /*
348      if nonzero structure of submatrix B cannot change then we know that
349      no processor disassembled thus we can skip this stuff
350   */
351   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
352     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
353     if (mat->was_assembled && !other_disassembled) {
354       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
355     }
356   }
357   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
358     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
359   }
360   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
361   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
362   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
363   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
364 
365   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
366   aij->rowvalues = 0;
367 
368   /* used by MatAXPY() */
369   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
370   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
371 
372   PetscFunctionReturn(0);
373 }
374 
375 #undef __FUNCT__
376 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
377 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
378 {
379   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
380   PetscErrorCode ierr;
381 
382   PetscFunctionBegin;
383   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
384   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
385   PetscFunctionReturn(0);
386 }
387 
388 #undef __FUNCT__
389 #define __FUNCT__ "MatZeroRows_MPIAIJ"
390 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
391 {
392   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
393   PetscErrorCode ierr;
394   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
395   PetscInt       i,*owners = A->rmap.range;
396   PetscInt       *nprocs,j,idx,nsends,row;
397   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
398   PetscInt       *rvalues,count,base,slen,*source;
399   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
400   MPI_Comm       comm = A->comm;
401   MPI_Request    *send_waits,*recv_waits;
402   MPI_Status     recv_status,*send_status;
403 #if defined(PETSC_DEBUG)
404   PetscTruth     found = PETSC_FALSE;
405 #endif
406 
407   PetscFunctionBegin;
408   /*  first count number of contributors to each processor */
409   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
410   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
411   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
412   j = 0;
413   for (i=0; i<N; i++) {
414     if (lastidx > (idx = rows[i])) j = 0;
415     lastidx = idx;
416     for (; j<size; j++) {
417       if (idx >= owners[j] && idx < owners[j+1]) {
418         nprocs[2*j]++;
419         nprocs[2*j+1] = 1;
420         owner[i] = j;
421 #if defined(PETSC_DEBUG)
422         found = PETSC_TRUE;
423 #endif
424         break;
425       }
426     }
427 #if defined(PETSC_DEBUG)
428     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
429     found = PETSC_FALSE;
430 #endif
431   }
432   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
433 
434   /* inform other processors of number of messages and max length*/
435   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
436 
437   /* post receives:   */
438   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
439   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
440   for (i=0; i<nrecvs; i++) {
441     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
442   }
443 
444   /* do sends:
445       1) starts[i] gives the starting index in svalues for stuff going to
446          the ith processor
447   */
448   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
449   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
450   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
451   starts[0] = 0;
452   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
453   for (i=0; i<N; i++) {
454     svalues[starts[owner[i]]++] = rows[i];
455   }
456 
457   starts[0] = 0;
458   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
459   count = 0;
460   for (i=0; i<size; i++) {
461     if (nprocs[2*i+1]) {
462       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
463     }
464   }
465   ierr = PetscFree(starts);CHKERRQ(ierr);
466 
467   base = owners[rank];
468 
469   /*  wait on receives */
470   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
471   source = lens + nrecvs;
472   count  = nrecvs; slen = 0;
473   while (count) {
474     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
475     /* unpack receives into our local space */
476     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
477     source[imdex]  = recv_status.MPI_SOURCE;
478     lens[imdex]    = n;
479     slen          += n;
480     count--;
481   }
482   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
483 
484   /* move the data into the send scatter */
485   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
486   count = 0;
487   for (i=0; i<nrecvs; i++) {
488     values = rvalues + i*nmax;
489     for (j=0; j<lens[i]; j++) {
490       lrows[count++] = values[j] - base;
491     }
492   }
493   ierr = PetscFree(rvalues);CHKERRQ(ierr);
494   ierr = PetscFree(lens);CHKERRQ(ierr);
495   ierr = PetscFree(owner);CHKERRQ(ierr);
496   ierr = PetscFree(nprocs);CHKERRQ(ierr);
497 
498   /* actually zap the local rows */
499   /*
500         Zero the required rows. If the "diagonal block" of the matrix
501      is square and the user wishes to set the diagonal we use separate
502      code so that MatSetValues() is not called for each diagonal allocating
503      new memory, thus calling lots of mallocs and slowing things down.
504 
505        Contributed by: Matthew Knepley
506   */
507   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
508   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
509   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
510     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
511   } else if (diag != 0.0) {
512     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
513     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
514       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
515 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
516     }
517     for (i = 0; i < slen; i++) {
518       row  = lrows[i] + rstart;
519       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
520     }
521     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
522     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
523   } else {
524     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
525   }
526   ierr = PetscFree(lrows);CHKERRQ(ierr);
527 
528   /* wait on sends */
529   if (nsends) {
530     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
531     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
532     ierr = PetscFree(send_status);CHKERRQ(ierr);
533   }
534   ierr = PetscFree(send_waits);CHKERRQ(ierr);
535   ierr = PetscFree(svalues);CHKERRQ(ierr);
536 
537   PetscFunctionReturn(0);
538 }
539 
540 #undef __FUNCT__
541 #define __FUNCT__ "MatMult_MPIAIJ"
542 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
543 {
544   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
545   PetscErrorCode ierr;
546   PetscInt       nt;
547 
548   PetscFunctionBegin;
549   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
550   if (nt != A->cmap.n) {
551     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
552   }
553   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
554   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
555   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
556   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatMultAdd_MPIAIJ"
562 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
563 {
564   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
565   PetscErrorCode ierr;
566 
567   PetscFunctionBegin;
568   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
569   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
570   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
571   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 #undef __FUNCT__
576 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
577 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
578 {
579   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
580   PetscErrorCode ierr;
581   PetscTruth     merged;
582 
583   PetscFunctionBegin;
584   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
585   /* do nondiagonal part */
586   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
587   if (!merged) {
588     /* send it on its way */
589     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
590     /* do local part */
591     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
592     /* receive remote parts: note this assumes the values are not actually */
593     /* added in yy until the next line, */
594     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
595   } else {
596     /* do local part */
597     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
598     /* send it on its way */
599     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
600     /* values actually were received in the Begin() but we need to call this nop */
601     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
602   }
603   PetscFunctionReturn(0);
604 }
605 
606 EXTERN_C_BEGIN
607 #undef __FUNCT__
608 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
609 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
610 {
611   MPI_Comm       comm;
612   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
613   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
614   IS             Me,Notme;
615   PetscErrorCode ierr;
616   PetscInt       M,N,first,last,*notme,i;
617   PetscMPIInt    size;
618 
619   PetscFunctionBegin;
620 
621   /* Easy test: symmetric diagonal block */
622   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
623   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
624   if (!*f) PetscFunctionReturn(0);
625   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
626   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
627   if (size == 1) PetscFunctionReturn(0);
628 
629   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
630   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
631   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
632   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
633   for (i=0; i<first; i++) notme[i] = i;
634   for (i=last; i<M; i++) notme[i-last+first] = i;
635   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
636   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
637   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
638   Aoff = Aoffs[0];
639   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
640   Boff = Boffs[0];
641   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
642   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
643   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
644   ierr = ISDestroy(Me);CHKERRQ(ierr);
645   ierr = ISDestroy(Notme);CHKERRQ(ierr);
646 
647   PetscFunctionReturn(0);
648 }
649 EXTERN_C_END
650 
651 #undef __FUNCT__
652 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
653 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
654 {
655   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
656   PetscErrorCode ierr;
657 
658   PetscFunctionBegin;
659   /* do nondiagonal part */
660   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
661   /* send it on its way */
662   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
663   /* do local part */
664   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
665   /* receive remote parts */
666   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
667   PetscFunctionReturn(0);
668 }
669 
670 /*
671   This only works correctly for square matrices where the subblock A->A is the
672    diagonal block
673 */
674 #undef __FUNCT__
675 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
676 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
677 {
678   PetscErrorCode ierr;
679   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
680 
681   PetscFunctionBegin;
682   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
683   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
684     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
685   }
686   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 
690 #undef __FUNCT__
691 #define __FUNCT__ "MatScale_MPIAIJ"
692 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
693 {
694   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
695   PetscErrorCode ierr;
696 
697   PetscFunctionBegin;
698   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
699   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
700   PetscFunctionReturn(0);
701 }
702 
703 #undef __FUNCT__
704 #define __FUNCT__ "MatDestroy_MPIAIJ"
705 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
706 {
707   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
708   PetscErrorCode ierr;
709 
710   PetscFunctionBegin;
711 #if defined(PETSC_USE_LOG)
712   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
713 #endif
714   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
715   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
716   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
717 #if defined (PETSC_USE_CTABLE)
718   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
719 #else
720   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
721 #endif
722   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
723   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
724   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
725   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
726   ierr = PetscFree(aij);CHKERRQ(ierr);
727 
728   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
729   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
730   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
736   PetscFunctionReturn(0);
737 }
738 
739 #undef __FUNCT__
740 #define __FUNCT__ "MatView_MPIAIJ_Binary"
741 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
742 {
743   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
744   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
745   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
746   PetscErrorCode    ierr;
747   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
748   int               fd;
749   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
750   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
751   PetscScalar       *column_values;
752 
753   PetscFunctionBegin;
754   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
755   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
756   nz   = A->nz + B->nz;
757   if (!rank) {
758     header[0] = MAT_FILE_COOKIE;
759     header[1] = mat->rmap.N;
760     header[2] = mat->cmap.N;
761     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
762     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
763     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
764     /* get largest number of rows any processor has */
765     rlen = mat->rmap.n;
766     range = mat->rmap.range;
767     for (i=1; i<size; i++) {
768       rlen = PetscMax(rlen,range[i+1] - range[i]);
769     }
770   } else {
771     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
772     rlen = mat->rmap.n;
773   }
774 
775   /* load up the local row counts */
776   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
777   for (i=0; i<mat->rmap.n; i++) {
778     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
779   }
780 
781   /* store the row lengths to the file */
782   if (!rank) {
783     MPI_Status status;
784     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
785     for (i=1; i<size; i++) {
786       rlen = range[i+1] - range[i];
787       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
788       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
789     }
790   } else {
791     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
792   }
793   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
794 
795   /* load up the local column indices */
796   nzmax = nz; /* )th processor needs space a largest processor needs */
797   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
798   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
799   cnt  = 0;
800   for (i=0; i<mat->rmap.n; i++) {
801     for (j=B->i[i]; j<B->i[i+1]; j++) {
802       if ( (col = garray[B->j[j]]) > cstart) break;
803       column_indices[cnt++] = col;
804     }
805     for (k=A->i[i]; k<A->i[i+1]; k++) {
806       column_indices[cnt++] = A->j[k] + cstart;
807     }
808     for (; j<B->i[i+1]; j++) {
809       column_indices[cnt++] = garray[B->j[j]];
810     }
811   }
812   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
813 
814   /* store the column indices to the file */
815   if (!rank) {
816     MPI_Status status;
817     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
818     for (i=1; i<size; i++) {
819       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
820       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
821       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
822       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
823     }
824   } else {
825     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
826     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
827   }
828   ierr = PetscFree(column_indices);CHKERRQ(ierr);
829 
830   /* load up the local column values */
831   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
832   cnt  = 0;
833   for (i=0; i<mat->rmap.n; i++) {
834     for (j=B->i[i]; j<B->i[i+1]; j++) {
835       if ( garray[B->j[j]] > cstart) break;
836       column_values[cnt++] = B->a[j];
837     }
838     for (k=A->i[i]; k<A->i[i+1]; k++) {
839       column_values[cnt++] = A->a[k];
840     }
841     for (; j<B->i[i+1]; j++) {
842       column_values[cnt++] = B->a[j];
843     }
844   }
845   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
846 
847   /* store the column values to the file */
848   if (!rank) {
849     MPI_Status status;
850     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
851     for (i=1; i<size; i++) {
852       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
853       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
854       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
855       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
856     }
857   } else {
858     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
859     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
860   }
861   ierr = PetscFree(column_values);CHKERRQ(ierr);
862   PetscFunctionReturn(0);
863 }
864 
865 #undef __FUNCT__
866 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
867 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
868 {
869   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
870   PetscErrorCode    ierr;
871   PetscMPIInt       rank = aij->rank,size = aij->size;
872   PetscTruth        isdraw,iascii,isbinary;
873   PetscViewer       sviewer;
874   PetscViewerFormat format;
875 
876   PetscFunctionBegin;
877   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
878   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
880   if (iascii) {
881     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
882     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
883       MatInfo    info;
884       PetscTruth inodes;
885 
886       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
887       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
888       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
889       if (!inodes) {
890         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
891 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
892       } else {
893         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
894 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
895       }
896       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
897       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
898       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
899       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
900       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
901       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
902       PetscFunctionReturn(0);
903     } else if (format == PETSC_VIEWER_ASCII_INFO) {
904       PetscInt   inodecount,inodelimit,*inodes;
905       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
906       if (inodes) {
907         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
908       } else {
909         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
910       }
911       PetscFunctionReturn(0);
912     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
913       PetscFunctionReturn(0);
914     }
915   } else if (isbinary) {
916     if (size == 1) {
917       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
918       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
919     } else {
920       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
921     }
922     PetscFunctionReturn(0);
923   } else if (isdraw) {
924     PetscDraw  draw;
925     PetscTruth isnull;
926     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
927     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
928   }
929 
930   if (size == 1) {
931     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
932     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
933   } else {
934     /* assemble the entire matrix onto first processor. */
935     Mat         A;
936     Mat_SeqAIJ  *Aloc;
937     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
938     PetscScalar *a;
939 
940     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
941     if (!rank) {
942       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
943     } else {
944       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
945     }
946     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
947     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
948     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
949     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
950 
951     /* copy over the A part */
952     Aloc = (Mat_SeqAIJ*)aij->A->data;
953     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
954     row = mat->rmap.rstart;
955     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
956     for (i=0; i<m; i++) {
957       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
958       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
959     }
960     aj = Aloc->j;
961     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
962 
963     /* copy over the B part */
964     Aloc = (Mat_SeqAIJ*)aij->B->data;
965     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
966     row  = mat->rmap.rstart;
967     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
968     ct   = cols;
969     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
970     for (i=0; i<m; i++) {
971       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
972       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
973     }
974     ierr = PetscFree(ct);CHKERRQ(ierr);
975     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977     /*
978        Everyone has to call to draw the matrix since the graphics waits are
979        synchronized across all processors that share the PetscDraw object
980     */
981     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
982     if (!rank) {
983       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
984       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
985     }
986     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
987     ierr = MatDestroy(A);CHKERRQ(ierr);
988   }
989   PetscFunctionReturn(0);
990 }
991 
992 #undef __FUNCT__
993 #define __FUNCT__ "MatView_MPIAIJ"
994 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
995 {
996   PetscErrorCode ierr;
997   PetscTruth     iascii,isdraw,issocket,isbinary;
998 
999   PetscFunctionBegin;
1000   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1001   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1002   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1003   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1004   if (iascii || isdraw || isbinary || issocket) {
1005     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1006   } else {
1007     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1008   }
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #undef __FUNCT__
1013 #define __FUNCT__ "MatRelax_MPIAIJ"
1014 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1015 {
1016   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1017   PetscErrorCode ierr;
1018   Vec            bb1;
1019 
1020   PetscFunctionBegin;
1021   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1022 
1023   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1024 
1025   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1026     if (flag & SOR_ZERO_INITIAL_GUESS) {
1027       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1028       its--;
1029     }
1030 
1031     while (its--) {
1032       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1033       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1034 
1035       /* update rhs: bb1 = bb - B*x */
1036       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1037       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1038 
1039       /* local sweep */
1040       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1041       CHKERRQ(ierr);
1042     }
1043   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1044     if (flag & SOR_ZERO_INITIAL_GUESS) {
1045       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1046       its--;
1047     }
1048     while (its--) {
1049       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1050       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1051 
1052       /* update rhs: bb1 = bb - B*x */
1053       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1054       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1055 
1056       /* local sweep */
1057       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1058       CHKERRQ(ierr);
1059     }
1060   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1061     if (flag & SOR_ZERO_INITIAL_GUESS) {
1062       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1063       its--;
1064     }
1065     while (its--) {
1066       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1067       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1068 
1069       /* update rhs: bb1 = bb - B*x */
1070       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1071       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1072 
1073       /* local sweep */
1074       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1075       CHKERRQ(ierr);
1076     }
1077   } else {
1078     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1079   }
1080 
1081   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 #undef __FUNCT__
1086 #define __FUNCT__ "MatPermute_MPIAIJ"
1087 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1088 {
1089   MPI_Comm       comm,pcomm;
1090   PetscInt       first,local_size,nrows,*rows;
1091   int            ntids;
1092   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1093   PetscErrorCode ierr;
1094 
1095   PetscFunctionBegin;
1096   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1097   /* make a collective version of 'rowp' */
1098   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1099   if (pcomm==comm) {
1100     crowp = rowp;
1101   } else {
1102     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1103     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1104     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1105     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1106   }
1107   /* collect the global row permutation and invert it */
1108   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1109   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1110   if (pcomm!=comm) {
1111     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1112   }
1113   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1114   /* get the local target indices */
1115   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1116   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1117   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1118   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1119   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1120   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1121   /* the column permutation is so much easier;
1122      make a local version of 'colp' and invert it */
1123   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1124   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1125   if (ntids==1) {
1126     lcolp = colp;
1127   } else {
1128     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1129     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1130     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1131   }
1132   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1133   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1134   if (ntids>1) {
1135     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1136     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1137   }
1138   /* now we just get the submatrix */
1139   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1140   /* clean up */
1141   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1142   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 #undef __FUNCT__
1147 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1148 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1149 {
1150   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1151   Mat            A = mat->A,B = mat->B;
1152   PetscErrorCode ierr;
1153   PetscReal      isend[5],irecv[5];
1154 
1155   PetscFunctionBegin;
1156   info->block_size     = 1.0;
1157   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1158   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1159   isend[3] = info->memory;  isend[4] = info->mallocs;
1160   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1161   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1162   isend[3] += info->memory;  isend[4] += info->mallocs;
1163   if (flag == MAT_LOCAL) {
1164     info->nz_used      = isend[0];
1165     info->nz_allocated = isend[1];
1166     info->nz_unneeded  = isend[2];
1167     info->memory       = isend[3];
1168     info->mallocs      = isend[4];
1169   } else if (flag == MAT_GLOBAL_MAX) {
1170     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1171     info->nz_used      = irecv[0];
1172     info->nz_allocated = irecv[1];
1173     info->nz_unneeded  = irecv[2];
1174     info->memory       = irecv[3];
1175     info->mallocs      = irecv[4];
1176   } else if (flag == MAT_GLOBAL_SUM) {
1177     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1178     info->nz_used      = irecv[0];
1179     info->nz_allocated = irecv[1];
1180     info->nz_unneeded  = irecv[2];
1181     info->memory       = irecv[3];
1182     info->mallocs      = irecv[4];
1183   }
1184   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1185   info->fill_ratio_needed = 0;
1186   info->factor_mallocs    = 0;
1187   info->rows_global       = (double)matin->rmap.N;
1188   info->columns_global    = (double)matin->cmap.N;
1189   info->rows_local        = (double)matin->rmap.n;
1190   info->columns_local     = (double)matin->cmap.N;
1191 
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 #undef __FUNCT__
1196 #define __FUNCT__ "MatSetOption_MPIAIJ"
1197 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1198 {
1199   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1200   PetscErrorCode ierr;
1201 
1202   PetscFunctionBegin;
1203   switch (op) {
1204   case MAT_NO_NEW_NONZERO_LOCATIONS:
1205   case MAT_YES_NEW_NONZERO_LOCATIONS:
1206   case MAT_COLUMNS_UNSORTED:
1207   case MAT_COLUMNS_SORTED:
1208   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1209   case MAT_KEEP_ZEROED_ROWS:
1210   case MAT_NEW_NONZERO_LOCATION_ERR:
1211   case MAT_USE_INODES:
1212   case MAT_DO_NOT_USE_INODES:
1213   case MAT_IGNORE_ZERO_ENTRIES:
1214     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1215     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1216     break;
1217   case MAT_ROW_ORIENTED:
1218     a->roworiented = PETSC_TRUE;
1219     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1220     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1221     break;
1222   case MAT_ROWS_SORTED:
1223   case MAT_ROWS_UNSORTED:
1224   case MAT_YES_NEW_DIAGONALS:
1225     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1226     break;
1227   case MAT_COLUMN_ORIENTED:
1228     a->roworiented = PETSC_FALSE;
1229     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1230     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1231     break;
1232   case MAT_IGNORE_OFF_PROC_ENTRIES:
1233     a->donotstash = PETSC_TRUE;
1234     break;
1235   case MAT_NO_NEW_DIAGONALS:
1236     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1237   case MAT_SYMMETRIC:
1238     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1239     break;
1240   case MAT_STRUCTURALLY_SYMMETRIC:
1241   case MAT_HERMITIAN:
1242   case MAT_SYMMETRY_ETERNAL:
1243     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1244     break;
1245   case MAT_NOT_SYMMETRIC:
1246   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1247   case MAT_NOT_HERMITIAN:
1248   case MAT_NOT_SYMMETRY_ETERNAL:
1249     break;
1250   default:
1251     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1252   }
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 #undef __FUNCT__
1257 #define __FUNCT__ "MatGetRow_MPIAIJ"
1258 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1259 {
1260   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1261   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1262   PetscErrorCode ierr;
1263   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1264   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1265   PetscInt       *cmap,*idx_p;
1266 
1267   PetscFunctionBegin;
1268   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1269   mat->getrowactive = PETSC_TRUE;
1270 
1271   if (!mat->rowvalues && (idx || v)) {
1272     /*
1273         allocate enough space to hold information from the longest row.
1274     */
1275     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1276     PetscInt     max = 1,tmp;
1277     for (i=0; i<matin->rmap.n; i++) {
1278       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1279       if (max < tmp) { max = tmp; }
1280     }
1281     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1282     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1283   }
1284 
1285   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1286   lrow = row - rstart;
1287 
1288   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1289   if (!v)   {pvA = 0; pvB = 0;}
1290   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1291   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1292   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1293   nztot = nzA + nzB;
1294 
1295   cmap  = mat->garray;
1296   if (v  || idx) {
1297     if (nztot) {
1298       /* Sort by increasing column numbers, assuming A and B already sorted */
1299       PetscInt imark = -1;
1300       if (v) {
1301         *v = v_p = mat->rowvalues;
1302         for (i=0; i<nzB; i++) {
1303           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1304           else break;
1305         }
1306         imark = i;
1307         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1308         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1309       }
1310       if (idx) {
1311         *idx = idx_p = mat->rowindices;
1312         if (imark > -1) {
1313           for (i=0; i<imark; i++) {
1314             idx_p[i] = cmap[cworkB[i]];
1315           }
1316         } else {
1317           for (i=0; i<nzB; i++) {
1318             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1319             else break;
1320           }
1321           imark = i;
1322         }
1323         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1324         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1325       }
1326     } else {
1327       if (idx) *idx = 0;
1328       if (v)   *v   = 0;
1329     }
1330   }
1331   *nz = nztot;
1332   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1333   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 #undef __FUNCT__
1338 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1339 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1340 {
1341   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1342 
1343   PetscFunctionBegin;
1344   if (!aij->getrowactive) {
1345     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1346   }
1347   aij->getrowactive = PETSC_FALSE;
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 #undef __FUNCT__
1352 #define __FUNCT__ "MatNorm_MPIAIJ"
1353 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1354 {
1355   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1356   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1357   PetscErrorCode ierr;
1358   PetscInt       i,j,cstart = mat->cmap.rstart;
1359   PetscReal      sum = 0.0;
1360   PetscScalar    *v;
1361 
1362   PetscFunctionBegin;
1363   if (aij->size == 1) {
1364     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1365   } else {
1366     if (type == NORM_FROBENIUS) {
1367       v = amat->a;
1368       for (i=0; i<amat->nz; i++) {
1369 #if defined(PETSC_USE_COMPLEX)
1370         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1371 #else
1372         sum += (*v)*(*v); v++;
1373 #endif
1374       }
1375       v = bmat->a;
1376       for (i=0; i<bmat->nz; i++) {
1377 #if defined(PETSC_USE_COMPLEX)
1378         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1379 #else
1380         sum += (*v)*(*v); v++;
1381 #endif
1382       }
1383       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1384       *norm = sqrt(*norm);
1385     } else if (type == NORM_1) { /* max column norm */
1386       PetscReal *tmp,*tmp2;
1387       PetscInt    *jj,*garray = aij->garray;
1388       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1389       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1390       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1391       *norm = 0.0;
1392       v = amat->a; jj = amat->j;
1393       for (j=0; j<amat->nz; j++) {
1394         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1395       }
1396       v = bmat->a; jj = bmat->j;
1397       for (j=0; j<bmat->nz; j++) {
1398         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1399       }
1400       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1401       for (j=0; j<mat->cmap.N; j++) {
1402         if (tmp2[j] > *norm) *norm = tmp2[j];
1403       }
1404       ierr = PetscFree(tmp);CHKERRQ(ierr);
1405       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1406     } else if (type == NORM_INFINITY) { /* max row norm */
1407       PetscReal ntemp = 0.0;
1408       for (j=0; j<aij->A->rmap.n; j++) {
1409         v = amat->a + amat->i[j];
1410         sum = 0.0;
1411         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1412           sum += PetscAbsScalar(*v); v++;
1413         }
1414         v = bmat->a + bmat->i[j];
1415         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1416           sum += PetscAbsScalar(*v); v++;
1417         }
1418         if (sum > ntemp) ntemp = sum;
1419       }
1420       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1421     } else {
1422       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1423     }
1424   }
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatTranspose_MPIAIJ"
1430 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1431 {
1432   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1433   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1434   PetscErrorCode ierr;
1435   PetscInt       M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
1436   Mat            B;
1437   PetscScalar    *array;
1438 
1439   PetscFunctionBegin;
1440   if (!matout && M != N) {
1441     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1442   }
1443 
1444   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1445   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1446   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1447   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1448 
1449   /* copy over the A part */
1450   Aloc = (Mat_SeqAIJ*)a->A->data;
1451   m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1452   row = A->rmap.rstart;
1453   for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;}
1454   for (i=0; i<m; i++) {
1455     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1456     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1457   }
1458   aj = Aloc->j;
1459   for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;}
1460 
1461   /* copy over the B part */
1462   Aloc = (Mat_SeqAIJ*)a->B->data;
1463   m = a->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1464   row  = A->rmap.rstart;
1465   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1466   ct   = cols;
1467   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1468   for (i=0; i<m; i++) {
1469     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1470     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1471   }
1472   ierr = PetscFree(ct);CHKERRQ(ierr);
1473   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1474   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1475   if (matout) {
1476     *matout = B;
1477   } else {
1478     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1479   }
1480   PetscFunctionReturn(0);
1481 }
1482 
1483 #undef __FUNCT__
1484 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1485 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1486 {
1487   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1488   Mat            a = aij->A,b = aij->B;
1489   PetscErrorCode ierr;
1490   PetscInt       s1,s2,s3;
1491 
1492   PetscFunctionBegin;
1493   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1494   if (rr) {
1495     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1496     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1497     /* Overlap communication with computation. */
1498     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1499   }
1500   if (ll) {
1501     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1502     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1503     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1504   }
1505   /* scale  the diagonal block */
1506   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1507 
1508   if (rr) {
1509     /* Do a scatter end and then right scale the off-diagonal block */
1510     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1511     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1512   }
1513 
1514   PetscFunctionReturn(0);
1515 }
1516 
1517 #undef __FUNCT__
1518 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1519 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1520 {
1521   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1522   PetscErrorCode ierr;
1523 
1524   PetscFunctionBegin;
1525   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1526   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1527   PetscFunctionReturn(0);
1528 }
1529 #undef __FUNCT__
1530 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1531 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1532 {
1533   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1534   PetscErrorCode ierr;
1535 
1536   PetscFunctionBegin;
1537   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 #undef __FUNCT__
1542 #define __FUNCT__ "MatEqual_MPIAIJ"
1543 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1544 {
1545   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1546   Mat            a,b,c,d;
1547   PetscTruth     flg;
1548   PetscErrorCode ierr;
1549 
1550   PetscFunctionBegin;
1551   a = matA->A; b = matA->B;
1552   c = matB->A; d = matB->B;
1553 
1554   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1555   if (flg) {
1556     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1557   }
1558   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 #undef __FUNCT__
1563 #define __FUNCT__ "MatCopy_MPIAIJ"
1564 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1565 {
1566   PetscErrorCode ierr;
1567   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1568   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1569 
1570   PetscFunctionBegin;
1571   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1572   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1573     /* because of the column compression in the off-processor part of the matrix a->B,
1574        the number of columns in a->B and b->B may be different, hence we cannot call
1575        the MatCopy() directly on the two parts. If need be, we can provide a more
1576        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1577        then copying the submatrices */
1578     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1579   } else {
1580     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1581     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1582   }
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 #undef __FUNCT__
1587 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1588 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1589 {
1590   PetscErrorCode ierr;
1591 
1592   PetscFunctionBegin;
1593   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1594   PetscFunctionReturn(0);
1595 }
1596 
1597 #include "petscblaslapack.h"
1598 #undef __FUNCT__
1599 #define __FUNCT__ "MatAXPY_MPIAIJ"
1600 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1601 {
1602   PetscErrorCode ierr;
1603   PetscInt       i;
1604   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1605   PetscBLASInt   bnz,one=1;
1606   Mat_SeqAIJ     *x,*y;
1607 
1608   PetscFunctionBegin;
1609   if (str == SAME_NONZERO_PATTERN) {
1610     PetscScalar alpha = a;
1611     x = (Mat_SeqAIJ *)xx->A->data;
1612     y = (Mat_SeqAIJ *)yy->A->data;
1613     bnz = (PetscBLASInt)x->nz;
1614     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1615     x = (Mat_SeqAIJ *)xx->B->data;
1616     y = (Mat_SeqAIJ *)yy->B->data;
1617     bnz = (PetscBLASInt)x->nz;
1618     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1619   } else if (str == SUBSET_NONZERO_PATTERN) {
1620     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1621 
1622     x = (Mat_SeqAIJ *)xx->B->data;
1623     y = (Mat_SeqAIJ *)yy->B->data;
1624     if (y->xtoy && y->XtoY != xx->B) {
1625       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1626       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1627     }
1628     if (!y->xtoy) { /* get xtoy */
1629       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1630       y->XtoY = xx->B;
1631     }
1632     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1633   } else {
1634     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1635   }
1636   PetscFunctionReturn(0);
1637 }
1638 
1639 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1640 
1641 #undef __FUNCT__
1642 #define __FUNCT__ "MatConjugate_MPIAIJ"
1643 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1644 {
1645 #if defined(PETSC_USE_COMPLEX)
1646   PetscErrorCode ierr;
1647   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1648 
1649   PetscFunctionBegin;
1650   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1651   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1652 #else
1653   PetscFunctionBegin;
1654 #endif
1655   PetscFunctionReturn(0);
1656 }
1657 
1658 #undef __FUNCT__
1659 #define __FUNCT__ "MatRealPart_MPIAIJ"
1660 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1661 {
1662   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1663   PetscErrorCode ierr;
1664 
1665   PetscFunctionBegin;
1666   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1667   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1668   PetscFunctionReturn(0);
1669 }
1670 
1671 #undef __FUNCT__
1672 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1673 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1674 {
1675   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1676   PetscErrorCode ierr;
1677 
1678   PetscFunctionBegin;
1679   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1680   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1681   PetscFunctionReturn(0);
1682 }
1683 
1684 #ifdef PETSC_HAVE_PBGL
1685 
1686 #include <boost/parallel/mpi/bsp_process_group.hpp>
1687 #include <boost/graph/distributed/ilu_default_graph.hpp>
1688 #include <boost/graph/distributed/ilu_0_block.hpp>
1689 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1690 #include <boost/graph/distributed/petsc/interface.hpp>
1691 #include <boost/multi_array.hpp>
1692 #include <boost/parallel/distributed_property_map.hpp>
1693 
1694 #undef __FUNCT__
1695 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1696 /*
1697   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1698 */
1699 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1700 {
1701   namespace petsc = boost::distributed::petsc;
1702 
1703   namespace graph_dist = boost::graph::distributed;
1704   using boost::graph::distributed::ilu_default::process_group_type;
1705   using boost::graph::ilu_permuted;
1706 
1707   PetscTruth      row_identity, col_identity;
1708   PetscContainer  c;
1709   PetscInt        m, n, M, N;
1710   PetscErrorCode  ierr;
1711 
1712   PetscFunctionBegin;
1713   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1714   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1715   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1716   if (!row_identity || !col_identity) {
1717     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1718   }
1719 
1720   process_group_type pg;
1721   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1722   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1723   lgraph_type&   level_graph = *lgraph_p;
1724   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1725 
1726   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1727   ilu_permuted(level_graph);
1728 
1729   /* put together the new matrix */
1730   ierr = MatCreate(A->comm, fact);CHKERRQ(ierr);
1731   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1732   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1733   ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr);
1734   ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr);
1735   ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1736   ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1737   (*fact)->factor = FACTOR_LU;
1738 
1739   ierr = PetscContainerCreate(A->comm, &c);
1740   ierr = PetscContainerSetPointer(c, lgraph_p);
1741   ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 #undef __FUNCT__
1746 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1747 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1748 {
1749   PetscFunctionBegin;
1750   PetscFunctionReturn(0);
1751 }
1752 
1753 #undef __FUNCT__
1754 #define __FUNCT__ "MatSolve_MPIAIJ"
1755 /*
1756   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1757 */
1758 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1759 {
1760   namespace graph_dist = boost::graph::distributed;
1761 
1762   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1763   lgraph_type*   lgraph_p;
1764   PetscContainer c;
1765   PetscErrorCode ierr;
1766 
1767   PetscFunctionBegin;
1768   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1769   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1770   ierr = VecCopy(b, x); CHKERRQ(ierr);
1771 
1772   PetscScalar* array_x;
1773   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1774   PetscInt sx;
1775   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1776 
1777   PetscScalar* array_b;
1778   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1779   PetscInt sb;
1780   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1781 
1782   lgraph_type&   level_graph = *lgraph_p;
1783   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1784 
1785   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1786   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1787                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1788 
1789   typedef boost::iterator_property_map<array_ref_type::iterator,
1790                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1791   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1792                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1793 
1794   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1795 
1796   PetscFunctionReturn(0);
1797 }
1798 #endif
1799 
1800 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1801   PetscInt       nzlocal,nsends,nrecvs;
1802   PetscInt       *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j;
1803   PetscScalar    *sbuf_a,**rbuf_a;
1804   PetscErrorCode (*MatDestroy)(Mat);
1805 } Mat_Redundant;
1806 
1807 #undef __FUNCT__
1808 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1809 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1810 {
1811   PetscErrorCode       ierr;
1812   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1813   PetscInt             i;
1814 
1815   PetscFunctionBegin;
1816   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1817   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1818   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1819   for (i=0; i<redund->nrecvs; i++){
1820     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1821     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1822   }
1823   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1824   ierr = PetscFree(redund);CHKERRQ(ierr);
1825   PetscFunctionReturn(0);
1826 }
1827 
1828 #undef __FUNCT__
1829 #define __FUNCT__ "MatDestroy_MatRedundant"
1830 PetscErrorCode MatDestroy_MatRedundant(Mat A)
1831 {
1832   PetscErrorCode  ierr;
1833   PetscContainer  container;
1834   Mat_Redundant   *redund=PETSC_NULL;
1835 
1836   PetscFunctionBegin;
1837   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1838   if (container) {
1839     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1840   } else {
1841     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1842   }
1843   A->ops->destroy = redund->MatDestroy;
1844   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
1845   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
1846   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
1847   PetscFunctionReturn(0);
1848 }
1849 
1850 #undef __FUNCT__
1851 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
1852 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
1853 {
1854   PetscMPIInt    rank,size;
1855   MPI_Comm       comm=mat->comm;
1856   PetscErrorCode ierr;
1857   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
1858   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
1859   PetscInt       *rowrange=mat->rmap.range;
1860   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1861   Mat            A=aij->A,B=aij->B,C=*matredundant;
1862   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
1863   PetscScalar    *sbuf_a;
1864   PetscInt       nzlocal=a->nz+b->nz;
1865   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
1866   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
1867   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
1868   PetscScalar    *vals,*aworkA,*aworkB;
1869   PetscMPIInt    tag1,tag2,tag3,imdex;
1870   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
1871                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
1872   MPI_Status     recv_status,*send_status;
1873   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
1874   PetscInt       **rbuf_j=PETSC_NULL;
1875   PetscScalar    **rbuf_a=PETSC_NULL;
1876   Mat_Redundant  *redund=PETSC_NULL;
1877   PetscContainer container;
1878 
1879   PetscFunctionBegin;
1880   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1881   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1882 
1883   if (reuse == MAT_REUSE_MATRIX) {
1884     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
1885     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
1886     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
1887     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
1888     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1889     if (container) {
1890       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1891     } else {
1892       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1893     }
1894     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
1895 
1896     nsends    = redund->nsends;
1897     nrecvs    = redund->nrecvs;
1898     send_rank = redund->send_rank; recv_rank = send_rank + size;
1899     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
1900     sbuf_j    = redund->sbuf_j;
1901     sbuf_a    = redund->sbuf_a;
1902     rbuf_j    = redund->rbuf_j;
1903     rbuf_a    = redund->rbuf_a;
1904   }
1905 
1906   if (reuse == MAT_INITIAL_MATRIX){
1907     PetscMPIInt  subrank,subsize;
1908     PetscInt     nleftover,np_subcomm;
1909     /* get the destination processors' id send_rank, nsends and nrecvs */
1910     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
1911     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
1912     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
1913     recv_rank = send_rank + size;
1914     np_subcomm = size/nsubcomm;
1915     nleftover  = size - nsubcomm*np_subcomm;
1916     nsends = 0; nrecvs = 0;
1917     for (i=0; i<size; i++){ /* i=rank*/
1918       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
1919         send_rank[nsends] = i; nsends++;
1920         recv_rank[nrecvs++] = i;
1921       }
1922     }
1923     if (rank >= size - nleftover){/* this proc is a leftover processor */
1924       i = size-nleftover-1;
1925       j = 0;
1926       while (j < nsubcomm - nleftover){
1927         send_rank[nsends++] = i;
1928         i--; j++;
1929       }
1930     }
1931 
1932     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
1933       for (i=0; i<nleftover; i++){
1934         recv_rank[nrecvs++] = size-nleftover+i;
1935       }
1936     }
1937 
1938     /* allocate sbuf_j, sbuf_a */
1939     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
1940     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
1941     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
1942   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1943 
1944   /* copy mat's local entries into the buffers */
1945   if (reuse == MAT_INITIAL_MATRIX){
1946     rownz_max = 0;
1947     rptr = sbuf_j;
1948     cols = sbuf_j + rend-rstart + 1;
1949     vals = sbuf_a;
1950     rptr[0] = 0;
1951     for (i=0; i<rend-rstart; i++){
1952       row = i + rstart;
1953       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1954       ncols  = nzA + nzB;
1955       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1956       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1957       /* load the column indices for this row into cols */
1958       lwrite = 0;
1959       for (l=0; l<nzB; l++) {
1960         if ((ctmp = bmap[cworkB[l]]) < cstart){
1961           vals[lwrite]   = aworkB[l];
1962           cols[lwrite++] = ctmp;
1963         }
1964       }
1965       for (l=0; l<nzA; l++){
1966         vals[lwrite]   = aworkA[l];
1967         cols[lwrite++] = cstart + cworkA[l];
1968       }
1969       for (l=0; l<nzB; l++) {
1970         if ((ctmp = bmap[cworkB[l]]) >= cend){
1971           vals[lwrite]   = aworkB[l];
1972           cols[lwrite++] = ctmp;
1973         }
1974       }
1975       vals += ncols;
1976       cols += ncols;
1977       rptr[i+1] = rptr[i] + ncols;
1978       if (rownz_max < ncols) rownz_max = ncols;
1979     }
1980     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
1981   } else { /* only copy matrix values into sbuf_a */
1982     rptr = sbuf_j;
1983     vals = sbuf_a;
1984     rptr[0] = 0;
1985     for (i=0; i<rend-rstart; i++){
1986       row = i + rstart;
1987       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1988       ncols  = nzA + nzB;
1989       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1990       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1991       lwrite = 0;
1992       for (l=0; l<nzB; l++) {
1993         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
1994       }
1995       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
1996       for (l=0; l<nzB; l++) {
1997         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
1998       }
1999       vals += ncols;
2000       rptr[i+1] = rptr[i] + ncols;
2001     }
2002   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2003 
2004   /* send nzlocal to others, and recv other's nzlocal */
2005   /*--------------------------------------------------*/
2006   if (reuse == MAT_INITIAL_MATRIX){
2007     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2008     s_waits2 = s_waits3 + nsends;
2009     s_waits1 = s_waits2 + nsends;
2010     r_waits1 = s_waits1 + nsends;
2011     r_waits2 = r_waits1 + nrecvs;
2012     r_waits3 = r_waits2 + nrecvs;
2013   } else {
2014     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2015     r_waits3 = s_waits3 + nsends;
2016   }
2017 
2018   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2019   if (reuse == MAT_INITIAL_MATRIX){
2020     /* get new tags to keep the communication clean */
2021     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2022     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2023     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2024     rbuf_nz = sbuf_nz + nsends;
2025 
2026     /* post receives of other's nzlocal */
2027     for (i=0; i<nrecvs; i++){
2028       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2029     }
2030     /* send nzlocal to others */
2031     for (i=0; i<nsends; i++){
2032       sbuf_nz[i] = nzlocal;
2033       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2034     }
2035     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2036     count = nrecvs;
2037     while (count) {
2038       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2039       recv_rank[imdex] = recv_status.MPI_SOURCE;
2040       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2041       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2042 
2043       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2044       rbuf_nz[imdex] += i + 2;
2045       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2046       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2047       count--;
2048     }
2049     /* wait on sends of nzlocal */
2050     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2051     /* send mat->i,j to others, and recv from other's */
2052     /*------------------------------------------------*/
2053     for (i=0; i<nsends; i++){
2054       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2055       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2056     }
2057     /* wait on receives of mat->i,j */
2058     /*------------------------------*/
2059     count = nrecvs;
2060     while (count) {
2061       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2062       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2063       count--;
2064     }
2065     /* wait on sends of mat->i,j */
2066     /*---------------------------*/
2067     if (nsends) {
2068       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2069     }
2070   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2071 
2072   /* post receives, send and receive mat->a */
2073   /*----------------------------------------*/
2074   for (imdex=0; imdex<nrecvs; imdex++) {
2075     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2076   }
2077   for (i=0; i<nsends; i++){
2078     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2079   }
2080   count = nrecvs;
2081   while (count) {
2082     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2083     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2084     count--;
2085   }
2086   if (nsends) {
2087     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2088   }
2089 
2090   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2091 
2092   /* create redundant matrix */
2093   /*-------------------------*/
2094   if (reuse == MAT_INITIAL_MATRIX){
2095     /* compute rownz_max for preallocation */
2096     for (imdex=0; imdex<nrecvs; imdex++){
2097       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2098       rptr = rbuf_j[imdex];
2099       for (i=0; i<j; i++){
2100         ncols = rptr[i+1] - rptr[i];
2101         if (rownz_max < ncols) rownz_max = ncols;
2102       }
2103     }
2104 
2105     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2106     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2107     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2108     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2109     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2110   } else {
2111     C = *matredundant;
2112   }
2113 
2114   /* insert local matrix entries */
2115   rptr = sbuf_j;
2116   cols = sbuf_j + rend-rstart + 1;
2117   vals = sbuf_a;
2118   for (i=0; i<rend-rstart; i++){
2119     row   = i + rstart;
2120     ncols = rptr[i+1] - rptr[i];
2121     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2122     vals += ncols;
2123     cols += ncols;
2124   }
2125   /* insert received matrix entries */
2126   for (imdex=0; imdex<nrecvs; imdex++){
2127     rstart = rowrange[recv_rank[imdex]];
2128     rend   = rowrange[recv_rank[imdex]+1];
2129     rptr = rbuf_j[imdex];
2130     cols = rbuf_j[imdex] + rend-rstart + 1;
2131     vals = rbuf_a[imdex];
2132     for (i=0; i<rend-rstart; i++){
2133       row   = i + rstart;
2134       ncols = rptr[i+1] - rptr[i];
2135       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2136       vals += ncols;
2137       cols += ncols;
2138     }
2139   }
2140   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2141   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2142   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2143   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
2144   if (reuse == MAT_INITIAL_MATRIX){
2145     PetscContainer container;
2146     *matredundant = C;
2147     /* create a supporting struct and attach it to C for reuse */
2148     ierr = PetscNew(Mat_Redundant,&redund);CHKERRQ(ierr);
2149     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2150     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2151     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2152     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2153 
2154     redund->nzlocal = nzlocal;
2155     redund->nsends  = nsends;
2156     redund->nrecvs  = nrecvs;
2157     redund->send_rank = send_rank;
2158     redund->sbuf_nz = sbuf_nz;
2159     redund->sbuf_j  = sbuf_j;
2160     redund->sbuf_a  = sbuf_a;
2161     redund->rbuf_j  = rbuf_j;
2162     redund->rbuf_a  = rbuf_a;
2163 
2164     redund->MatDestroy = C->ops->destroy;
2165     C->ops->destroy    = MatDestroy_MatRedundant;
2166   }
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 /* -------------------------------------------------------------------*/
2171 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2172        MatGetRow_MPIAIJ,
2173        MatRestoreRow_MPIAIJ,
2174        MatMult_MPIAIJ,
2175 /* 4*/ MatMultAdd_MPIAIJ,
2176        MatMultTranspose_MPIAIJ,
2177        MatMultTransposeAdd_MPIAIJ,
2178 #ifdef PETSC_HAVE_PBGL
2179        MatSolve_MPIAIJ,
2180 #else
2181        0,
2182 #endif
2183        0,
2184        0,
2185 /*10*/ 0,
2186        0,
2187        0,
2188        MatRelax_MPIAIJ,
2189        MatTranspose_MPIAIJ,
2190 /*15*/ MatGetInfo_MPIAIJ,
2191        MatEqual_MPIAIJ,
2192        MatGetDiagonal_MPIAIJ,
2193        MatDiagonalScale_MPIAIJ,
2194        MatNorm_MPIAIJ,
2195 /*20*/ MatAssemblyBegin_MPIAIJ,
2196        MatAssemblyEnd_MPIAIJ,
2197        0,
2198        MatSetOption_MPIAIJ,
2199        MatZeroEntries_MPIAIJ,
2200 /*25*/ MatZeroRows_MPIAIJ,
2201        0,
2202 #ifdef PETSC_HAVE_PBGL
2203        MatLUFactorNumeric_MPIAIJ,
2204 #else
2205        0,
2206 #endif
2207        0,
2208        0,
2209 /*30*/ MatSetUpPreallocation_MPIAIJ,
2210 #ifdef PETSC_HAVE_PBGL
2211        MatILUFactorSymbolic_MPIAIJ,
2212 #else
2213        0,
2214 #endif
2215        0,
2216        0,
2217        0,
2218 /*35*/ MatDuplicate_MPIAIJ,
2219        0,
2220        0,
2221        0,
2222        0,
2223 /*40*/ MatAXPY_MPIAIJ,
2224        MatGetSubMatrices_MPIAIJ,
2225        MatIncreaseOverlap_MPIAIJ,
2226        MatGetValues_MPIAIJ,
2227        MatCopy_MPIAIJ,
2228 /*45*/ 0,
2229        MatScale_MPIAIJ,
2230        0,
2231        0,
2232        0,
2233 /*50*/ MatSetBlockSize_MPIAIJ,
2234        0,
2235        0,
2236        0,
2237        0,
2238 /*55*/ MatFDColoringCreate_MPIAIJ,
2239        0,
2240        MatSetUnfactored_MPIAIJ,
2241        MatPermute_MPIAIJ,
2242        0,
2243 /*60*/ MatGetSubMatrix_MPIAIJ,
2244        MatDestroy_MPIAIJ,
2245        MatView_MPIAIJ,
2246        0,
2247        0,
2248 /*65*/ 0,
2249        0,
2250        0,
2251        0,
2252        0,
2253 /*70*/ 0,
2254        0,
2255        MatSetColoring_MPIAIJ,
2256 #if defined(PETSC_HAVE_ADIC)
2257        MatSetValuesAdic_MPIAIJ,
2258 #else
2259        0,
2260 #endif
2261        MatSetValuesAdifor_MPIAIJ,
2262 /*75*/ 0,
2263        0,
2264        0,
2265        0,
2266        0,
2267 /*80*/ 0,
2268        0,
2269        0,
2270        0,
2271 /*84*/ MatLoad_MPIAIJ,
2272        0,
2273        0,
2274        0,
2275        0,
2276        0,
2277 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2278        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2279        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2280        MatPtAP_Basic,
2281        MatPtAPSymbolic_MPIAIJ,
2282 /*95*/ MatPtAPNumeric_MPIAIJ,
2283        0,
2284        0,
2285        0,
2286        0,
2287 /*100*/0,
2288        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2289        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2290        MatConjugate_MPIAIJ,
2291        0,
2292 /*105*/MatSetValuesRow_MPIAIJ,
2293        MatRealPart_MPIAIJ,
2294        MatImaginaryPart_MPIAIJ,
2295        0,
2296        0,
2297 /*110*/0,
2298        MatGetRedundantMatrix_MPIAIJ};
2299 
2300 /* ----------------------------------------------------------------------------------------*/
2301 
2302 EXTERN_C_BEGIN
2303 #undef __FUNCT__
2304 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2305 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2306 {
2307   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2308   PetscErrorCode ierr;
2309 
2310   PetscFunctionBegin;
2311   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2312   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2313   PetscFunctionReturn(0);
2314 }
2315 EXTERN_C_END
2316 
2317 EXTERN_C_BEGIN
2318 #undef __FUNCT__
2319 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2320 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2321 {
2322   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2323   PetscErrorCode ierr;
2324 
2325   PetscFunctionBegin;
2326   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2327   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2328   PetscFunctionReturn(0);
2329 }
2330 EXTERN_C_END
2331 
2332 #include "petscpc.h"
2333 EXTERN_C_BEGIN
2334 #undef __FUNCT__
2335 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2336 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2337 {
2338   Mat_MPIAIJ     *b;
2339   PetscErrorCode ierr;
2340   PetscInt       i;
2341 
2342   PetscFunctionBegin;
2343   B->preallocated = PETSC_TRUE;
2344   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2345   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2346   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2347   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2348 
2349   B->rmap.bs = B->cmap.bs = 1;
2350   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2351   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2352   if (d_nnz) {
2353     for (i=0; i<B->rmap.n; i++) {
2354       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2355     }
2356   }
2357   if (o_nnz) {
2358     for (i=0; i<B->rmap.n; i++) {
2359       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2360     }
2361   }
2362   b = (Mat_MPIAIJ*)B->data;
2363 
2364   /* Explicitly create 2 MATSEQAIJ matrices. */
2365   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2366   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2367   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2368   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2369   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2370   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2371   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2372   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2373 
2374   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2375   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2376 
2377   PetscFunctionReturn(0);
2378 }
2379 EXTERN_C_END
2380 
2381 #undef __FUNCT__
2382 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2383 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2384 {
2385   Mat            mat;
2386   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2387   PetscErrorCode ierr;
2388 
2389   PetscFunctionBegin;
2390   *newmat       = 0;
2391   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2392   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2393   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2394   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2395   a    = (Mat_MPIAIJ*)mat->data;
2396 
2397   mat->factor       = matin->factor;
2398   mat->rmap.bs      = matin->rmap.bs;
2399   mat->assembled    = PETSC_TRUE;
2400   mat->insertmode   = NOT_SET_VALUES;
2401   mat->preallocated = PETSC_TRUE;
2402 
2403   a->size           = oldmat->size;
2404   a->rank           = oldmat->rank;
2405   a->donotstash     = oldmat->donotstash;
2406   a->roworiented    = oldmat->roworiented;
2407   a->rowindices     = 0;
2408   a->rowvalues      = 0;
2409   a->getrowactive   = PETSC_FALSE;
2410 
2411   ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2412   ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2413 
2414   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2415   if (oldmat->colmap) {
2416 #if defined (PETSC_USE_CTABLE)
2417     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2418 #else
2419     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2420     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2421     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2422 #endif
2423   } else a->colmap = 0;
2424   if (oldmat->garray) {
2425     PetscInt len;
2426     len  = oldmat->B->cmap.n;
2427     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2428     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2429     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2430   } else a->garray = 0;
2431 
2432   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2433   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2434   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2435   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2436   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2437   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2438   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2439   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2440   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2441   *newmat = mat;
2442   PetscFunctionReturn(0);
2443 }
2444 
2445 #include "petscsys.h"
2446 
2447 #undef __FUNCT__
2448 #define __FUNCT__ "MatLoad_MPIAIJ"
2449 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2450 {
2451   Mat            A;
2452   PetscScalar    *vals,*svals;
2453   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2454   MPI_Status     status;
2455   PetscErrorCode ierr;
2456   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2457   PetscInt       i,nz,j,rstart,rend,mmax;
2458   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2459   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2460   PetscInt       cend,cstart,n,*rowners;
2461   int            fd;
2462 
2463   PetscFunctionBegin;
2464   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2465   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2466   if (!rank) {
2467     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2468     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2469     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2470   }
2471 
2472   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2473   M = header[1]; N = header[2];
2474   /* determine ownership of all rows */
2475   m    = M/size + ((M % size) > rank);
2476   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2477   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2478 
2479   /* First process needs enough room for process with most rows */
2480   if (!rank) {
2481     mmax       = rowners[1];
2482     for (i=2; i<size; i++) {
2483       mmax = PetscMax(mmax,rowners[i]);
2484     }
2485   } else mmax = m;
2486 
2487   rowners[0] = 0;
2488   for (i=2; i<=size; i++) {
2489     rowners[i] += rowners[i-1];
2490   }
2491   rstart = rowners[rank];
2492   rend   = rowners[rank+1];
2493 
2494   /* distribute row lengths to all processors */
2495   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2496   if (!rank) {
2497     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2498     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2499     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2500     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2501     for (j=0; j<m; j++) {
2502       procsnz[0] += ourlens[j];
2503     }
2504     for (i=1; i<size; i++) {
2505       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2506       /* calculate the number of nonzeros on each processor */
2507       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2508         procsnz[i] += rowlengths[j];
2509       }
2510       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2511     }
2512     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2513   } else {
2514     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2515   }
2516 
2517   if (!rank) {
2518     /* determine max buffer needed and allocate it */
2519     maxnz = 0;
2520     for (i=0; i<size; i++) {
2521       maxnz = PetscMax(maxnz,procsnz[i]);
2522     }
2523     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2524 
2525     /* read in my part of the matrix column indices  */
2526     nz   = procsnz[0];
2527     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2528     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2529 
2530     /* read in every one elses and ship off */
2531     for (i=1; i<size; i++) {
2532       nz   = procsnz[i];
2533       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2534       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2535     }
2536     ierr = PetscFree(cols);CHKERRQ(ierr);
2537   } else {
2538     /* determine buffer space needed for message */
2539     nz = 0;
2540     for (i=0; i<m; i++) {
2541       nz += ourlens[i];
2542     }
2543     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2544 
2545     /* receive message of column indices*/
2546     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2547     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2548     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2549   }
2550 
2551   /* determine column ownership if matrix is not square */
2552   if (N != M) {
2553     n      = N/size + ((N % size) > rank);
2554     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2555     cstart = cend - n;
2556   } else {
2557     cstart = rstart;
2558     cend   = rend;
2559     n      = cend - cstart;
2560   }
2561 
2562   /* loop over local rows, determining number of off diagonal entries */
2563   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2564   jj = 0;
2565   for (i=0; i<m; i++) {
2566     for (j=0; j<ourlens[i]; j++) {
2567       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2568       jj++;
2569     }
2570   }
2571 
2572   /* create our matrix */
2573   for (i=0; i<m; i++) {
2574     ourlens[i] -= offlens[i];
2575   }
2576   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2577   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2578   ierr = MatSetType(A,type);CHKERRQ(ierr);
2579   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2580 
2581   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2582   for (i=0; i<m; i++) {
2583     ourlens[i] += offlens[i];
2584   }
2585 
2586   if (!rank) {
2587     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2588 
2589     /* read in my part of the matrix numerical values  */
2590     nz   = procsnz[0];
2591     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2592 
2593     /* insert into matrix */
2594     jj      = rstart;
2595     smycols = mycols;
2596     svals   = vals;
2597     for (i=0; i<m; i++) {
2598       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2599       smycols += ourlens[i];
2600       svals   += ourlens[i];
2601       jj++;
2602     }
2603 
2604     /* read in other processors and ship out */
2605     for (i=1; i<size; i++) {
2606       nz   = procsnz[i];
2607       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2608       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2609     }
2610     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2611   } else {
2612     /* receive numeric values */
2613     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2614 
2615     /* receive message of values*/
2616     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2617     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2618     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2619 
2620     /* insert into matrix */
2621     jj      = rstart;
2622     smycols = mycols;
2623     svals   = vals;
2624     for (i=0; i<m; i++) {
2625       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2626       smycols += ourlens[i];
2627       svals   += ourlens[i];
2628       jj++;
2629     }
2630   }
2631   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2632   ierr = PetscFree(vals);CHKERRQ(ierr);
2633   ierr = PetscFree(mycols);CHKERRQ(ierr);
2634   ierr = PetscFree(rowners);CHKERRQ(ierr);
2635 
2636   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2637   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2638   *newmat = A;
2639   PetscFunctionReturn(0);
2640 }
2641 
2642 #undef __FUNCT__
2643 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2644 /*
2645     Not great since it makes two copies of the submatrix, first an SeqAIJ
2646   in local and then by concatenating the local matrices the end result.
2647   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2648 */
2649 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2650 {
2651   PetscErrorCode ierr;
2652   PetscMPIInt    rank,size;
2653   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2654   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2655   Mat            *local,M,Mreuse;
2656   PetscScalar    *vwork,*aa;
2657   MPI_Comm       comm = mat->comm;
2658   Mat_SeqAIJ     *aij;
2659 
2660 
2661   PetscFunctionBegin;
2662   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2663   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2664 
2665   if (call ==  MAT_REUSE_MATRIX) {
2666     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2667     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2668     local = &Mreuse;
2669     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2670   } else {
2671     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2672     Mreuse = *local;
2673     ierr   = PetscFree(local);CHKERRQ(ierr);
2674   }
2675 
2676   /*
2677       m - number of local rows
2678       n - number of columns (same on all processors)
2679       rstart - first row in new global matrix generated
2680   */
2681   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2682   if (call == MAT_INITIAL_MATRIX) {
2683     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2684     ii  = aij->i;
2685     jj  = aij->j;
2686 
2687     /*
2688         Determine the number of non-zeros in the diagonal and off-diagonal
2689         portions of the matrix in order to do correct preallocation
2690     */
2691 
2692     /* first get start and end of "diagonal" columns */
2693     if (csize == PETSC_DECIDE) {
2694       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2695       if (mglobal == n) { /* square matrix */
2696 	nlocal = m;
2697       } else {
2698         nlocal = n/size + ((n % size) > rank);
2699       }
2700     } else {
2701       nlocal = csize;
2702     }
2703     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2704     rstart = rend - nlocal;
2705     if (rank == size - 1 && rend != n) {
2706       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2707     }
2708 
2709     /* next, compute all the lengths */
2710     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2711     olens = dlens + m;
2712     for (i=0; i<m; i++) {
2713       jend = ii[i+1] - ii[i];
2714       olen = 0;
2715       dlen = 0;
2716       for (j=0; j<jend; j++) {
2717         if (*jj < rstart || *jj >= rend) olen++;
2718         else dlen++;
2719         jj++;
2720       }
2721       olens[i] = olen;
2722       dlens[i] = dlen;
2723     }
2724     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2725     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2726     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2727     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2728     ierr = PetscFree(dlens);CHKERRQ(ierr);
2729   } else {
2730     PetscInt ml,nl;
2731 
2732     M = *newmat;
2733     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2734     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2735     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2736     /*
2737          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2738        rather than the slower MatSetValues().
2739     */
2740     M->was_assembled = PETSC_TRUE;
2741     M->assembled     = PETSC_FALSE;
2742   }
2743   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2744   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2745   ii  = aij->i;
2746   jj  = aij->j;
2747   aa  = aij->a;
2748   for (i=0; i<m; i++) {
2749     row   = rstart + i;
2750     nz    = ii[i+1] - ii[i];
2751     cwork = jj;     jj += nz;
2752     vwork = aa;     aa += nz;
2753     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2754   }
2755 
2756   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2757   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2758   *newmat = M;
2759 
2760   /* save submatrix used in processor for next request */
2761   if (call ==  MAT_INITIAL_MATRIX) {
2762     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2763     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2764   }
2765 
2766   PetscFunctionReturn(0);
2767 }
2768 
2769 EXTERN_C_BEGIN
2770 #undef __FUNCT__
2771 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2772 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2773 {
2774   PetscInt       m,cstart, cend,j,nnz,i,d;
2775   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2776   const PetscInt *JJ;
2777   PetscScalar    *values;
2778   PetscErrorCode ierr;
2779 
2780   PetscFunctionBegin;
2781   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2782 
2783   B->rmap.bs = B->cmap.bs = 1;
2784   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2785   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2786   m      = B->rmap.n;
2787   cstart = B->cmap.rstart;
2788   cend   = B->cmap.rend;
2789   rstart = B->rmap.rstart;
2790 
2791   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2792   o_nnz = d_nnz + m;
2793 
2794   for (i=0; i<m; i++) {
2795     nnz     = Ii[i+1]- Ii[i];
2796     JJ      = J + Ii[i];
2797     nnz_max = PetscMax(nnz_max,nnz);
2798     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2799     for (j=0; j<nnz; j++) {
2800       if (*JJ >= cstart) break;
2801       JJ++;
2802     }
2803     d = 0;
2804     for (; j<nnz; j++) {
2805       if (*JJ++ >= cend) break;
2806       d++;
2807     }
2808     d_nnz[i] = d;
2809     o_nnz[i] = nnz - d;
2810   }
2811   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2812   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2813 
2814   if (v) values = (PetscScalar*)v;
2815   else {
2816     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2817     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2818   }
2819 
2820   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2821   for (i=0; i<m; i++) {
2822     ii   = i + rstart;
2823     nnz  = Ii[i+1]- Ii[i];
2824     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2825   }
2826   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2827   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2828   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2829 
2830   if (!v) {
2831     ierr = PetscFree(values);CHKERRQ(ierr);
2832   }
2833   PetscFunctionReturn(0);
2834 }
2835 EXTERN_C_END
2836 
2837 #undef __FUNCT__
2838 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2839 /*@
2840    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2841    (the default parallel PETSc format).
2842 
2843    Collective on MPI_Comm
2844 
2845    Input Parameters:
2846 +  B - the matrix
2847 .  i - the indices into j for the start of each local row (starts with zero)
2848 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2849 -  v - optional values in the matrix
2850 
2851    Level: developer
2852 
2853    Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal
2854      storage format. Thus changing the values in a[] after this call will not effect the matrix values.
2855 
2856 .keywords: matrix, aij, compressed row, sparse, parallel
2857 
2858 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
2859           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
2860 @*/
2861 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2862 {
2863   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2864 
2865   PetscFunctionBegin;
2866   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2867   if (f) {
2868     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2869   }
2870   PetscFunctionReturn(0);
2871 }
2872 
2873 #undef __FUNCT__
2874 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2875 /*@C
2876    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2877    (the default parallel PETSc format).  For good matrix assembly performance
2878    the user should preallocate the matrix storage by setting the parameters
2879    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2880    performance can be increased by more than a factor of 50.
2881 
2882    Collective on MPI_Comm
2883 
2884    Input Parameters:
2885 +  A - the matrix
2886 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2887            (same value is used for all local rows)
2888 .  d_nnz - array containing the number of nonzeros in the various rows of the
2889            DIAGONAL portion of the local submatrix (possibly different for each row)
2890            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2891            The size of this array is equal to the number of local rows, i.e 'm'.
2892            You must leave room for the diagonal entry even if it is zero.
2893 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2894            submatrix (same value is used for all local rows).
2895 -  o_nnz - array containing the number of nonzeros in the various rows of the
2896            OFF-DIAGONAL portion of the local submatrix (possibly different for
2897            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2898            structure. The size of this array is equal to the number
2899            of local rows, i.e 'm'.
2900 
2901    If the *_nnz parameter is given then the *_nz parameter is ignored
2902 
2903    The AIJ format (also called the Yale sparse matrix format or
2904    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2905    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2906 
2907    The parallel matrix is partitioned such that the first m0 rows belong to
2908    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2909    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2910 
2911    The DIAGONAL portion of the local submatrix of a processor can be defined
2912    as the submatrix which is obtained by extraction the part corresponding
2913    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2914    first row that belongs to the processor, and r2 is the last row belonging
2915    to the this processor. This is a square mxm matrix. The remaining portion
2916    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2917 
2918    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2919 
2920    Example usage:
2921 
2922    Consider the following 8x8 matrix with 34 non-zero values, that is
2923    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2924    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2925    as follows:
2926 
2927 .vb
2928             1  2  0  |  0  3  0  |  0  4
2929     Proc0   0  5  6  |  7  0  0  |  8  0
2930             9  0 10  | 11  0  0  | 12  0
2931     -------------------------------------
2932            13  0 14  | 15 16 17  |  0  0
2933     Proc1   0 18  0  | 19 20 21  |  0  0
2934             0  0  0  | 22 23  0  | 24  0
2935     -------------------------------------
2936     Proc2  25 26 27  |  0  0 28  | 29  0
2937            30  0  0  | 31 32 33  |  0 34
2938 .ve
2939 
2940    This can be represented as a collection of submatrices as:
2941 
2942 .vb
2943       A B C
2944       D E F
2945       G H I
2946 .ve
2947 
2948    Where the submatrices A,B,C are owned by proc0, D,E,F are
2949    owned by proc1, G,H,I are owned by proc2.
2950 
2951    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2952    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2953    The 'M','N' parameters are 8,8, and have the same values on all procs.
2954 
2955    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2956    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2957    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2958    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2959    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2960    matrix, ans [DF] as another SeqAIJ matrix.
2961 
2962    When d_nz, o_nz parameters are specified, d_nz storage elements are
2963    allocated for every row of the local diagonal submatrix, and o_nz
2964    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2965    One way to choose d_nz and o_nz is to use the max nonzerors per local
2966    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2967    In this case, the values of d_nz,o_nz are:
2968 .vb
2969      proc0 : dnz = 2, o_nz = 2
2970      proc1 : dnz = 3, o_nz = 2
2971      proc2 : dnz = 1, o_nz = 4
2972 .ve
2973    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2974    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2975    for proc3. i.e we are using 12+15+10=37 storage locations to store
2976    34 values.
2977 
2978    When d_nnz, o_nnz parameters are specified, the storage is specified
2979    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2980    In the above case the values for d_nnz,o_nnz are:
2981 .vb
2982      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2983      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2984      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2985 .ve
2986    Here the space allocated is sum of all the above values i.e 34, and
2987    hence pre-allocation is perfect.
2988 
2989    Level: intermediate
2990 
2991 .keywords: matrix, aij, compressed row, sparse, parallel
2992 
2993 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2994           MPIAIJ
2995 @*/
2996 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2997 {
2998   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2999 
3000   PetscFunctionBegin;
3001   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3002   if (f) {
3003     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3004   }
3005   PetscFunctionReturn(0);
3006 }
3007 
3008 #undef __FUNCT__
3009 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3010 /*@C
3011      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3012          CSR format the local rows.
3013 
3014    Collective on MPI_Comm
3015 
3016    Input Parameters:
3017 +  comm - MPI communicator
3018 .  m - number of local rows (Cannot be PETSC_DECIDE)
3019 .  n - This value should be the same as the local size used in creating the
3020        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3021        calculated if N is given) For square matrices n is almost always m.
3022 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3023 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3024 .   i - row indices
3025 .   j - column indices
3026 -   a - matrix values
3027 
3028    Output Parameter:
3029 .   mat - the matrix
3030 
3031    Level: intermediate
3032 
3033    Notes:
3034        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3035      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3036      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3037 
3038        The i and j indices are 0 based
3039 
3040 .keywords: matrix, aij, compressed row, sparse, parallel
3041 
3042 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3043           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3044 @*/
3045 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3046 {
3047   PetscErrorCode ierr;
3048 
3049  PetscFunctionBegin;
3050   if (i[0]) {
3051     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3052   }
3053   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3054   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3055   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3056   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3057   PetscFunctionReturn(0);
3058 }
3059 
3060 #undef __FUNCT__
3061 #define __FUNCT__ "MatCreateMPIAIJ"
3062 /*@C
3063    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3064    (the default parallel PETSc format).  For good matrix assembly performance
3065    the user should preallocate the matrix storage by setting the parameters
3066    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3067    performance can be increased by more than a factor of 50.
3068 
3069    Collective on MPI_Comm
3070 
3071    Input Parameters:
3072 +  comm - MPI communicator
3073 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3074            This value should be the same as the local size used in creating the
3075            y vector for the matrix-vector product y = Ax.
3076 .  n - This value should be the same as the local size used in creating the
3077        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3078        calculated if N is given) For square matrices n is almost always m.
3079 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3080 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3081 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3082            (same value is used for all local rows)
3083 .  d_nnz - array containing the number of nonzeros in the various rows of the
3084            DIAGONAL portion of the local submatrix (possibly different for each row)
3085            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3086            The size of this array is equal to the number of local rows, i.e 'm'.
3087            You must leave room for the diagonal entry even if it is zero.
3088 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3089            submatrix (same value is used for all local rows).
3090 -  o_nnz - array containing the number of nonzeros in the various rows of the
3091            OFF-DIAGONAL portion of the local submatrix (possibly different for
3092            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3093            structure. The size of this array is equal to the number
3094            of local rows, i.e 'm'.
3095 
3096    Output Parameter:
3097 .  A - the matrix
3098 
3099    Notes:
3100    If the *_nnz parameter is given then the *_nz parameter is ignored
3101 
3102    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3103    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3104    storage requirements for this matrix.
3105 
3106    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3107    processor than it must be used on all processors that share the object for
3108    that argument.
3109 
3110    The user MUST specify either the local or global matrix dimensions
3111    (possibly both).
3112 
3113    The parallel matrix is partitioned such that the first m0 rows belong to
3114    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3115    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3116 
3117    The DIAGONAL portion of the local submatrix of a processor can be defined
3118    as the submatrix which is obtained by extraction the part corresponding
3119    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3120    first row that belongs to the processor, and r2 is the last row belonging
3121    to the this processor. This is a square mxm matrix. The remaining portion
3122    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3123 
3124    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3125 
3126    When calling this routine with a single process communicator, a matrix of
3127    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3128    type of communicator, use the construction mechanism:
3129      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3130 
3131    By default, this format uses inodes (identical nodes) when possible.
3132    We search for consecutive rows with the same nonzero structure, thereby
3133    reusing matrix information to achieve increased efficiency.
3134 
3135    Options Database Keys:
3136 +  -mat_no_inode  - Do not use inodes
3137 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3138 -  -mat_aij_oneindex - Internally use indexing starting at 1
3139         rather than 0.  Note that when calling MatSetValues(),
3140         the user still MUST index entries starting at 0!
3141 
3142 
3143    Example usage:
3144 
3145    Consider the following 8x8 matrix with 34 non-zero values, that is
3146    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3147    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3148    as follows:
3149 
3150 .vb
3151             1  2  0  |  0  3  0  |  0  4
3152     Proc0   0  5  6  |  7  0  0  |  8  0
3153             9  0 10  | 11  0  0  | 12  0
3154     -------------------------------------
3155            13  0 14  | 15 16 17  |  0  0
3156     Proc1   0 18  0  | 19 20 21  |  0  0
3157             0  0  0  | 22 23  0  | 24  0
3158     -------------------------------------
3159     Proc2  25 26 27  |  0  0 28  | 29  0
3160            30  0  0  | 31 32 33  |  0 34
3161 .ve
3162 
3163    This can be represented as a collection of submatrices as:
3164 
3165 .vb
3166       A B C
3167       D E F
3168       G H I
3169 .ve
3170 
3171    Where the submatrices A,B,C are owned by proc0, D,E,F are
3172    owned by proc1, G,H,I are owned by proc2.
3173 
3174    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3175    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3176    The 'M','N' parameters are 8,8, and have the same values on all procs.
3177 
3178    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3179    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3180    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3181    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3182    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3183    matrix, ans [DF] as another SeqAIJ matrix.
3184 
3185    When d_nz, o_nz parameters are specified, d_nz storage elements are
3186    allocated for every row of the local diagonal submatrix, and o_nz
3187    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3188    One way to choose d_nz and o_nz is to use the max nonzerors per local
3189    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3190    In this case, the values of d_nz,o_nz are:
3191 .vb
3192      proc0 : dnz = 2, o_nz = 2
3193      proc1 : dnz = 3, o_nz = 2
3194      proc2 : dnz = 1, o_nz = 4
3195 .ve
3196    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3197    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3198    for proc3. i.e we are using 12+15+10=37 storage locations to store
3199    34 values.
3200 
3201    When d_nnz, o_nnz parameters are specified, the storage is specified
3202    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3203    In the above case the values for d_nnz,o_nnz are:
3204 .vb
3205      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3206      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3207      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3208 .ve
3209    Here the space allocated is sum of all the above values i.e 34, and
3210    hence pre-allocation is perfect.
3211 
3212    Level: intermediate
3213 
3214 .keywords: matrix, aij, compressed row, sparse, parallel
3215 
3216 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3217           MPIAIJ, MatCreateMPIAIJWithArrays()
3218 @*/
3219 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3220 {
3221   PetscErrorCode ierr;
3222   PetscMPIInt    size;
3223 
3224   PetscFunctionBegin;
3225   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3226   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3227   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3228   if (size > 1) {
3229     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3230     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3231   } else {
3232     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3233     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3234   }
3235   PetscFunctionReturn(0);
3236 }
3237 
3238 #undef __FUNCT__
3239 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3240 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3241 {
3242   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3243 
3244   PetscFunctionBegin;
3245   *Ad     = a->A;
3246   *Ao     = a->B;
3247   *colmap = a->garray;
3248   PetscFunctionReturn(0);
3249 }
3250 
3251 #undef __FUNCT__
3252 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3253 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3254 {
3255   PetscErrorCode ierr;
3256   PetscInt       i;
3257   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3258 
3259   PetscFunctionBegin;
3260   if (coloring->ctype == IS_COLORING_GLOBAL) {
3261     ISColoringValue *allcolors,*colors;
3262     ISColoring      ocoloring;
3263 
3264     /* set coloring for diagonal portion */
3265     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3266 
3267     /* set coloring for off-diagonal portion */
3268     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3269     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3270     for (i=0; i<a->B->cmap.n; i++) {
3271       colors[i] = allcolors[a->garray[i]];
3272     }
3273     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3274     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3275     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3276     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3277   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3278     ISColoringValue *colors;
3279     PetscInt        *larray;
3280     ISColoring      ocoloring;
3281 
3282     /* set coloring for diagonal portion */
3283     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3284     for (i=0; i<a->A->cmap.n; i++) {
3285       larray[i] = i + A->cmap.rstart;
3286     }
3287     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3288     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3289     for (i=0; i<a->A->cmap.n; i++) {
3290       colors[i] = coloring->colors[larray[i]];
3291     }
3292     ierr = PetscFree(larray);CHKERRQ(ierr);
3293     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3294     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3295     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3296 
3297     /* set coloring for off-diagonal portion */
3298     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3299     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3300     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3301     for (i=0; i<a->B->cmap.n; i++) {
3302       colors[i] = coloring->colors[larray[i]];
3303     }
3304     ierr = PetscFree(larray);CHKERRQ(ierr);
3305     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3306     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3307     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3308   } else {
3309     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3310   }
3311 
3312   PetscFunctionReturn(0);
3313 }
3314 
3315 #if defined(PETSC_HAVE_ADIC)
3316 #undef __FUNCT__
3317 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3318 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3319 {
3320   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3321   PetscErrorCode ierr;
3322 
3323   PetscFunctionBegin;
3324   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3325   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3326   PetscFunctionReturn(0);
3327 }
3328 #endif
3329 
3330 #undef __FUNCT__
3331 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3332 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3333 {
3334   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3335   PetscErrorCode ierr;
3336 
3337   PetscFunctionBegin;
3338   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3339   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3340   PetscFunctionReturn(0);
3341 }
3342 
3343 #undef __FUNCT__
3344 #define __FUNCT__ "MatMerge"
3345 /*@C
3346       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3347                  matrices from each processor
3348 
3349     Collective on MPI_Comm
3350 
3351    Input Parameters:
3352 +    comm - the communicators the parallel matrix will live on
3353 .    inmat - the input sequential matrices
3354 .    n - number of local columns (or PETSC_DECIDE)
3355 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3356 
3357    Output Parameter:
3358 .    outmat - the parallel matrix generated
3359 
3360     Level: advanced
3361 
3362    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3363 
3364 @*/
3365 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3366 {
3367   PetscErrorCode ierr;
3368   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3369   PetscInt       *indx;
3370   PetscScalar    *values;
3371 
3372   PetscFunctionBegin;
3373   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3374   if (scall == MAT_INITIAL_MATRIX){
3375     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3376     if (n == PETSC_DECIDE){
3377       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3378     }
3379     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3380     rstart -= m;
3381 
3382     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3383     for (i=0;i<m;i++) {
3384       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3385       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3386       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3387     }
3388     /* This routine will ONLY return MPIAIJ type matrix */
3389     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3390     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3391     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3392     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3393     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3394 
3395   } else if (scall == MAT_REUSE_MATRIX){
3396     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3397   } else {
3398     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3399   }
3400 
3401   for (i=0;i<m;i++) {
3402     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3403     Ii    = i + rstart;
3404     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3405     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3406   }
3407   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3408   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3409   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3410 
3411   PetscFunctionReturn(0);
3412 }
3413 
3414 #undef __FUNCT__
3415 #define __FUNCT__ "MatFileSplit"
3416 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3417 {
3418   PetscErrorCode    ierr;
3419   PetscMPIInt       rank;
3420   PetscInt          m,N,i,rstart,nnz;
3421   size_t            len;
3422   const PetscInt    *indx;
3423   PetscViewer       out;
3424   char              *name;
3425   Mat               B;
3426   const PetscScalar *values;
3427 
3428   PetscFunctionBegin;
3429   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3430   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3431   /* Should this be the type of the diagonal block of A? */
3432   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3433   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3434   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3435   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3436   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3437   for (i=0;i<m;i++) {
3438     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3439     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3440     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3441   }
3442   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3443   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3444 
3445   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
3446   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3447   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3448   sprintf(name,"%s.%d",outfile,rank);
3449   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3450   ierr = PetscFree(name);
3451   ierr = MatView(B,out);CHKERRQ(ierr);
3452   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3453   ierr = MatDestroy(B);CHKERRQ(ierr);
3454   PetscFunctionReturn(0);
3455 }
3456 
3457 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3458 #undef __FUNCT__
3459 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3460 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3461 {
3462   PetscErrorCode       ierr;
3463   Mat_Merge_SeqsToMPI  *merge;
3464   PetscContainer       container;
3465 
3466   PetscFunctionBegin;
3467   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3468   if (container) {
3469     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3470     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3471     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3472     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3473     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3474     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3475     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3476     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3477     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3478     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3479     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3480     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3481 
3482     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3483     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3484   }
3485   ierr = PetscFree(merge);CHKERRQ(ierr);
3486 
3487   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3488   PetscFunctionReturn(0);
3489 }
3490 
3491 #include "src/mat/utils/freespace.h"
3492 #include "petscbt.h"
3493 static PetscEvent logkey_seqstompinum = 0;
3494 #undef __FUNCT__
3495 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3496 /*@C
3497       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3498                  matrices from each processor
3499 
3500     Collective on MPI_Comm
3501 
3502    Input Parameters:
3503 +    comm - the communicators the parallel matrix will live on
3504 .    seqmat - the input sequential matrices
3505 .    m - number of local rows (or PETSC_DECIDE)
3506 .    n - number of local columns (or PETSC_DECIDE)
3507 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3508 
3509    Output Parameter:
3510 .    mpimat - the parallel matrix generated
3511 
3512     Level: advanced
3513 
3514    Notes:
3515      The dimensions of the sequential matrix in each processor MUST be the same.
3516      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3517      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3518 @*/
3519 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3520 {
3521   PetscErrorCode       ierr;
3522   MPI_Comm             comm=mpimat->comm;
3523   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3524   PetscMPIInt          size,rank,taga,*len_s;
3525   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3526   PetscInt             proc,m;
3527   PetscInt             **buf_ri,**buf_rj;
3528   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3529   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3530   MPI_Request          *s_waits,*r_waits;
3531   MPI_Status           *status;
3532   MatScalar            *aa=a->a,**abuf_r,*ba_i;
3533   Mat_Merge_SeqsToMPI  *merge;
3534   PetscContainer       container;
3535 
3536   PetscFunctionBegin;
3537   if (!logkey_seqstompinum) {
3538     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
3539   }
3540   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3541 
3542   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3543   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3544 
3545   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3546   if (container) {
3547     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3548   }
3549   bi     = merge->bi;
3550   bj     = merge->bj;
3551   buf_ri = merge->buf_ri;
3552   buf_rj = merge->buf_rj;
3553 
3554   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3555   owners = merge->rowmap.range;
3556   len_s  = merge->len_s;
3557 
3558   /* send and recv matrix values */
3559   /*-----------------------------*/
3560   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3561   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3562 
3563   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3564   for (proc=0,k=0; proc<size; proc++){
3565     if (!len_s[proc]) continue;
3566     i = owners[proc];
3567     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3568     k++;
3569   }
3570 
3571   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3572   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3573   ierr = PetscFree(status);CHKERRQ(ierr);
3574 
3575   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3576   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3577 
3578   /* insert mat values of mpimat */
3579   /*----------------------------*/
3580   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3581   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3582   nextrow = buf_ri_k + merge->nrecv;
3583   nextai  = nextrow + merge->nrecv;
3584 
3585   for (k=0; k<merge->nrecv; k++){
3586     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3587     nrows = *(buf_ri_k[k]);
3588     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3589     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3590   }
3591 
3592   /* set values of ba */
3593   m = merge->rowmap.n;
3594   for (i=0; i<m; i++) {
3595     arow = owners[rank] + i;
3596     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3597     bnzi = bi[i+1] - bi[i];
3598     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3599 
3600     /* add local non-zero vals of this proc's seqmat into ba */
3601     anzi = ai[arow+1] - ai[arow];
3602     aj   = a->j + ai[arow];
3603     aa   = a->a + ai[arow];
3604     nextaj = 0;
3605     for (j=0; nextaj<anzi; j++){
3606       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3607         ba_i[j] += aa[nextaj++];
3608       }
3609     }
3610 
3611     /* add received vals into ba */
3612     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3613       /* i-th row */
3614       if (i == *nextrow[k]) {
3615         anzi = *(nextai[k]+1) - *nextai[k];
3616         aj   = buf_rj[k] + *(nextai[k]);
3617         aa   = abuf_r[k] + *(nextai[k]);
3618         nextaj = 0;
3619         for (j=0; nextaj<anzi; j++){
3620           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3621             ba_i[j] += aa[nextaj++];
3622           }
3623         }
3624         nextrow[k]++; nextai[k]++;
3625       }
3626     }
3627     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3628   }
3629   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3630   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3631 
3632   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3633   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3634   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3635   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3636   PetscFunctionReturn(0);
3637 }
3638 
3639 static PetscEvent logkey_seqstompisym = 0;
3640 #undef __FUNCT__
3641 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3642 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3643 {
3644   PetscErrorCode       ierr;
3645   Mat                  B_mpi;
3646   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3647   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3648   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3649   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3650   PetscInt             len,proc,*dnz,*onz;
3651   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3652   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3653   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3654   MPI_Status           *status;
3655   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3656   PetscBT              lnkbt;
3657   Mat_Merge_SeqsToMPI  *merge;
3658   PetscContainer       container;
3659 
3660   PetscFunctionBegin;
3661   if (!logkey_seqstompisym) {
3662     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3663   }
3664   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3665 
3666   /* make sure it is a PETSc comm */
3667   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3668   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3669   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3670 
3671   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3672   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3673 
3674   /* determine row ownership */
3675   /*---------------------------------------------------------*/
3676   merge->rowmap.n = m;
3677   merge->rowmap.N = M;
3678   merge->rowmap.bs = 1;
3679   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3680   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3681   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3682 
3683   m      = merge->rowmap.n;
3684   M      = merge->rowmap.N;
3685   owners = merge->rowmap.range;
3686 
3687   /* determine the number of messages to send, their lengths */
3688   /*---------------------------------------------------------*/
3689   len_s  = merge->len_s;
3690 
3691   len = 0;  /* length of buf_si[] */
3692   merge->nsend = 0;
3693   for (proc=0; proc<size; proc++){
3694     len_si[proc] = 0;
3695     if (proc == rank){
3696       len_s[proc] = 0;
3697     } else {
3698       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3699       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3700     }
3701     if (len_s[proc]) {
3702       merge->nsend++;
3703       nrows = 0;
3704       for (i=owners[proc]; i<owners[proc+1]; i++){
3705         if (ai[i+1] > ai[i]) nrows++;
3706       }
3707       len_si[proc] = 2*(nrows+1);
3708       len += len_si[proc];
3709     }
3710   }
3711 
3712   /* determine the number and length of messages to receive for ij-structure */
3713   /*-------------------------------------------------------------------------*/
3714   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3715   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3716 
3717   /* post the Irecv of j-structure */
3718   /*-------------------------------*/
3719   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
3720   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3721 
3722   /* post the Isend of j-structure */
3723   /*--------------------------------*/
3724   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3725   sj_waits = si_waits + merge->nsend;
3726 
3727   for (proc=0, k=0; proc<size; proc++){
3728     if (!len_s[proc]) continue;
3729     i = owners[proc];
3730     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3731     k++;
3732   }
3733 
3734   /* receives and sends of j-structure are complete */
3735   /*------------------------------------------------*/
3736   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3737   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3738 
3739   /* send and recv i-structure */
3740   /*---------------------------*/
3741   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
3742   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3743 
3744   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3745   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3746   for (proc=0,k=0; proc<size; proc++){
3747     if (!len_s[proc]) continue;
3748     /* form outgoing message for i-structure:
3749          buf_si[0]:                 nrows to be sent
3750                [1:nrows]:           row index (global)
3751                [nrows+1:2*nrows+1]: i-structure index
3752     */
3753     /*-------------------------------------------*/
3754     nrows = len_si[proc]/2 - 1;
3755     buf_si_i    = buf_si + nrows+1;
3756     buf_si[0]   = nrows;
3757     buf_si_i[0] = 0;
3758     nrows = 0;
3759     for (i=owners[proc]; i<owners[proc+1]; i++){
3760       anzi = ai[i+1] - ai[i];
3761       if (anzi) {
3762         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3763         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3764         nrows++;
3765       }
3766     }
3767     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3768     k++;
3769     buf_si += len_si[proc];
3770   }
3771 
3772   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3773   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3774 
3775   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3776   for (i=0; i<merge->nrecv; i++){
3777     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3778   }
3779 
3780   ierr = PetscFree(len_si);CHKERRQ(ierr);
3781   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3782   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3783   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3784   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3785   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3786   ierr = PetscFree(status);CHKERRQ(ierr);
3787 
3788   /* compute a local seq matrix in each processor */
3789   /*----------------------------------------------*/
3790   /* allocate bi array and free space for accumulating nonzero column info */
3791   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3792   bi[0] = 0;
3793 
3794   /* create and initialize a linked list */
3795   nlnk = N+1;
3796   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3797 
3798   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3799   len = 0;
3800   len  = ai[owners[rank+1]] - ai[owners[rank]];
3801   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3802   current_space = free_space;
3803 
3804   /* determine symbolic info for each local row */
3805   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3806   nextrow = buf_ri_k + merge->nrecv;
3807   nextai  = nextrow + merge->nrecv;
3808   for (k=0; k<merge->nrecv; k++){
3809     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3810     nrows = *buf_ri_k[k];
3811     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3812     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3813   }
3814 
3815   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3816   len = 0;
3817   for (i=0;i<m;i++) {
3818     bnzi   = 0;
3819     /* add local non-zero cols of this proc's seqmat into lnk */
3820     arow   = owners[rank] + i;
3821     anzi   = ai[arow+1] - ai[arow];
3822     aj     = a->j + ai[arow];
3823     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3824     bnzi += nlnk;
3825     /* add received col data into lnk */
3826     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3827       if (i == *nextrow[k]) { /* i-th row */
3828         anzi = *(nextai[k]+1) - *nextai[k];
3829         aj   = buf_rj[k] + *nextai[k];
3830         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3831         bnzi += nlnk;
3832         nextrow[k]++; nextai[k]++;
3833       }
3834     }
3835     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3836 
3837     /* if free space is not available, make more free space */
3838     if (current_space->local_remaining<bnzi) {
3839       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3840       nspacedouble++;
3841     }
3842     /* copy data into free space, then initialize lnk */
3843     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3844     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3845 
3846     current_space->array           += bnzi;
3847     current_space->local_used      += bnzi;
3848     current_space->local_remaining -= bnzi;
3849 
3850     bi[i+1] = bi[i] + bnzi;
3851   }
3852 
3853   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3854 
3855   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3856   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3857   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3858 
3859   /* create symbolic parallel matrix B_mpi */
3860   /*---------------------------------------*/
3861   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3862   if (n==PETSC_DECIDE) {
3863     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3864   } else {
3865     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3866   }
3867   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3868   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3869   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3870 
3871   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3872   B_mpi->assembled     = PETSC_FALSE;
3873   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3874   merge->bi            = bi;
3875   merge->bj            = bj;
3876   merge->buf_ri        = buf_ri;
3877   merge->buf_rj        = buf_rj;
3878   merge->coi           = PETSC_NULL;
3879   merge->coj           = PETSC_NULL;
3880   merge->owners_co     = PETSC_NULL;
3881 
3882   /* attach the supporting struct to B_mpi for reuse */
3883   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3884   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
3885   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3886   *mpimat = B_mpi;
3887 
3888   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3889   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3890   PetscFunctionReturn(0);
3891 }
3892 
3893 static PetscEvent logkey_seqstompi = 0;
3894 #undef __FUNCT__
3895 #define __FUNCT__ "MatMerge_SeqsToMPI"
3896 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3897 {
3898   PetscErrorCode   ierr;
3899 
3900   PetscFunctionBegin;
3901   if (!logkey_seqstompi) {
3902     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3903   }
3904   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3905   if (scall == MAT_INITIAL_MATRIX){
3906     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3907   }
3908   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3909   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3910   PetscFunctionReturn(0);
3911 }
3912 static PetscEvent logkey_getlocalmat = 0;
3913 #undef __FUNCT__
3914 #define __FUNCT__ "MatGetLocalMat"
3915 /*@C
3916      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3917 
3918     Not Collective
3919 
3920    Input Parameters:
3921 +    A - the matrix
3922 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3923 
3924    Output Parameter:
3925 .    A_loc - the local sequential matrix generated
3926 
3927     Level: developer
3928 
3929 @*/
3930 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3931 {
3932   PetscErrorCode  ierr;
3933   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3934   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3935   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3936   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3937   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
3938   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3939 
3940   PetscFunctionBegin;
3941   if (!logkey_getlocalmat) {
3942     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3943   }
3944   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3945   if (scall == MAT_INITIAL_MATRIX){
3946     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3947     ci[0] = 0;
3948     for (i=0; i<am; i++){
3949       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3950     }
3951     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3952     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3953     k = 0;
3954     for (i=0; i<am; i++) {
3955       ncols_o = bi[i+1] - bi[i];
3956       ncols_d = ai[i+1] - ai[i];
3957       /* off-diagonal portion of A */
3958       for (jo=0; jo<ncols_o; jo++) {
3959         col = cmap[*bj];
3960         if (col >= cstart) break;
3961         cj[k]   = col; bj++;
3962         ca[k++] = *ba++;
3963       }
3964       /* diagonal portion of A */
3965       for (j=0; j<ncols_d; j++) {
3966         cj[k]   = cstart + *aj++;
3967         ca[k++] = *aa++;
3968       }
3969       /* off-diagonal portion of A */
3970       for (j=jo; j<ncols_o; j++) {
3971         cj[k]   = cmap[*bj++];
3972         ca[k++] = *ba++;
3973       }
3974     }
3975     /* put together the new matrix */
3976     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3977     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3978     /* Since these are PETSc arrays, change flags to free them as necessary. */
3979     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
3980     mat->free_a  = PETSC_TRUE;
3981     mat->free_ij = PETSC_TRUE;
3982     mat->nonew   = 0;
3983   } else if (scall == MAT_REUSE_MATRIX){
3984     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3985     ci = mat->i; cj = mat->j; ca = mat->a;
3986     for (i=0; i<am; i++) {
3987       /* off-diagonal portion of A */
3988       ncols_o = bi[i+1] - bi[i];
3989       for (jo=0; jo<ncols_o; jo++) {
3990         col = cmap[*bj];
3991         if (col >= cstart) break;
3992         *ca++ = *ba++; bj++;
3993       }
3994       /* diagonal portion of A */
3995       ncols_d = ai[i+1] - ai[i];
3996       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3997       /* off-diagonal portion of A */
3998       for (j=jo; j<ncols_o; j++) {
3999         *ca++ = *ba++; bj++;
4000       }
4001     }
4002   } else {
4003     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4004   }
4005 
4006   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
4007   PetscFunctionReturn(0);
4008 }
4009 
4010 static PetscEvent logkey_getlocalmatcondensed = 0;
4011 #undef __FUNCT__
4012 #define __FUNCT__ "MatGetLocalMatCondensed"
4013 /*@C
4014      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4015 
4016     Not Collective
4017 
4018    Input Parameters:
4019 +    A - the matrix
4020 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4021 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4022 
4023    Output Parameter:
4024 .    A_loc - the local sequential matrix generated
4025 
4026     Level: developer
4027 
4028 @*/
4029 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4030 {
4031   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4032   PetscErrorCode    ierr;
4033   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4034   IS                isrowa,iscola;
4035   Mat               *aloc;
4036 
4037   PetscFunctionBegin;
4038   if (!logkey_getlocalmatcondensed) {
4039     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
4040   }
4041   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4042   if (!row){
4043     start = A->rmap.rstart; end = A->rmap.rend;
4044     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4045   } else {
4046     isrowa = *row;
4047   }
4048   if (!col){
4049     start = A->cmap.rstart;
4050     cmap  = a->garray;
4051     nzA   = a->A->cmap.n;
4052     nzB   = a->B->cmap.n;
4053     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4054     ncols = 0;
4055     for (i=0; i<nzB; i++) {
4056       if (cmap[i] < start) idx[ncols++] = cmap[i];
4057       else break;
4058     }
4059     imark = i;
4060     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4061     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4062     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4063     ierr = PetscFree(idx);CHKERRQ(ierr);
4064   } else {
4065     iscola = *col;
4066   }
4067   if (scall != MAT_INITIAL_MATRIX){
4068     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4069     aloc[0] = *A_loc;
4070   }
4071   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4072   *A_loc = aloc[0];
4073   ierr = PetscFree(aloc);CHKERRQ(ierr);
4074   if (!row){
4075     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4076   }
4077   if (!col){
4078     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4079   }
4080   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4081   PetscFunctionReturn(0);
4082 }
4083 
4084 static PetscEvent logkey_GetBrowsOfAcols = 0;
4085 #undef __FUNCT__
4086 #define __FUNCT__ "MatGetBrowsOfAcols"
4087 /*@C
4088     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4089 
4090     Collective on Mat
4091 
4092    Input Parameters:
4093 +    A,B - the matrices in mpiaij format
4094 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4095 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4096 
4097    Output Parameter:
4098 +    rowb, colb - index sets of rows and columns of B to extract
4099 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
4100 -    B_seq - the sequential matrix generated
4101 
4102     Level: developer
4103 
4104 @*/
4105 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4106 {
4107   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4108   PetscErrorCode    ierr;
4109   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4110   IS                isrowb,iscolb;
4111   Mat               *bseq;
4112 
4113   PetscFunctionBegin;
4114   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4115     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4116   }
4117   if (!logkey_GetBrowsOfAcols) {
4118     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
4119   }
4120   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4121 
4122   if (scall == MAT_INITIAL_MATRIX){
4123     start = A->cmap.rstart;
4124     cmap  = a->garray;
4125     nzA   = a->A->cmap.n;
4126     nzB   = a->B->cmap.n;
4127     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4128     ncols = 0;
4129     for (i=0; i<nzB; i++) {  /* row < local row index */
4130       if (cmap[i] < start) idx[ncols++] = cmap[i];
4131       else break;
4132     }
4133     imark = i;
4134     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4135     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4136     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4137     ierr = PetscFree(idx);CHKERRQ(ierr);
4138     *brstart = imark;
4139     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
4140   } else {
4141     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4142     isrowb = *rowb; iscolb = *colb;
4143     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4144     bseq[0] = *B_seq;
4145   }
4146   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4147   *B_seq = bseq[0];
4148   ierr = PetscFree(bseq);CHKERRQ(ierr);
4149   if (!rowb){
4150     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4151   } else {
4152     *rowb = isrowb;
4153   }
4154   if (!colb){
4155     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4156   } else {
4157     *colb = iscolb;
4158   }
4159   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4160   PetscFunctionReturn(0);
4161 }
4162 
4163 static PetscEvent logkey_GetBrowsOfAocols = 0;
4164 #undef __FUNCT__
4165 #define __FUNCT__ "MatGetBrowsOfAoCols"
4166 /*@C
4167     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4168     of the OFF-DIAGONAL portion of local A
4169 
4170     Collective on Mat
4171 
4172    Input Parameters:
4173 +    A,B - the matrices in mpiaij format
4174 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4175 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4176 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4177 
4178    Output Parameter:
4179 +    B_oth - the sequential matrix generated
4180 
4181     Level: developer
4182 
4183 @*/
4184 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
4185 {
4186   VecScatter_MPI_General *gen_to,*gen_from;
4187   PetscErrorCode         ierr;
4188   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4189   Mat_SeqAIJ             *b_oth;
4190   VecScatter             ctx=a->Mvctx;
4191   MPI_Comm               comm=ctx->comm;
4192   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
4193   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
4194   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
4195   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4196   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4197   MPI_Status             *sstatus,rstatus;
4198   PetscInt               *cols,sbs,rbs;
4199   PetscScalar            *vals;
4200 
4201   PetscFunctionBegin;
4202   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4203     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4204   }
4205   if (!logkey_GetBrowsOfAocols) {
4206     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
4207   }
4208   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4209   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4210 
4211   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4212   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4213   rvalues  = gen_from->values; /* holds the length of receiving row */
4214   svalues  = gen_to->values;   /* holds the length of sending row */
4215   nrecvs   = gen_from->n;
4216   nsends   = gen_to->n;
4217 
4218   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4219   srow     = gen_to->indices;   /* local row index to be sent */
4220   sstarts  = gen_to->starts;
4221   sprocs   = gen_to->procs;
4222   sstatus  = gen_to->sstatus;
4223   sbs      = gen_to->bs;
4224   rstarts  = gen_from->starts;
4225   rprocs   = gen_from->procs;
4226   rbs      = gen_from->bs;
4227 
4228   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4229   if (scall == MAT_INITIAL_MATRIX){
4230     /* i-array */
4231     /*---------*/
4232     /*  post receives */
4233     for (i=0; i<nrecvs; i++){
4234       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4235       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4236       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4237     }
4238 
4239     /* pack the outgoing message */
4240     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4241     rstartsj = sstartsj + nsends +1;
4242     sstartsj[0] = 0;  rstartsj[0] = 0;
4243     len = 0; /* total length of j or a array to be sent */
4244     k = 0;
4245     for (i=0; i<nsends; i++){
4246       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4247       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4248       for (j=0; j<nrows; j++) {
4249         row = srow[k] + B->rmap.range[rank]; /* global row idx */
4250         for (l=0; l<sbs; l++){
4251           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4252           rowlen[j*sbs+l] = ncols;
4253           len += ncols;
4254           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4255         }
4256         k++;
4257       }
4258       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4259       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4260     }
4261     /* recvs and sends of i-array are completed */
4262     i = nrecvs;
4263     while (i--) {
4264       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
4265     }
4266     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4267 
4268     /* allocate buffers for sending j and a arrays */
4269     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4270     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4271 
4272     /* create i-array of B_oth */
4273     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4274     b_othi[0] = 0;
4275     len = 0; /* total length of j or a array to be received */
4276     k = 0;
4277     for (i=0; i<nrecvs; i++){
4278       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4279       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4280       for (j=0; j<nrows; j++) {
4281         b_othi[k+1] = b_othi[k] + rowlen[j];
4282         len += rowlen[j]; k++;
4283       }
4284       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4285     }
4286 
4287     /* allocate space for j and a arrrays of B_oth */
4288     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4289     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
4290 
4291     /* j-array */
4292     /*---------*/
4293     /*  post receives of j-array */
4294     for (i=0; i<nrecvs; i++){
4295       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4296       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4297     }
4298 
4299     /* pack the outgoing message j-array */
4300     k = 0;
4301     for (i=0; i<nsends; i++){
4302       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4303       bufJ = bufj+sstartsj[i];
4304       for (j=0; j<nrows; j++) {
4305         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4306         for (ll=0; ll<sbs; ll++){
4307           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4308           for (l=0; l<ncols; l++){
4309             *bufJ++ = cols[l];
4310           }
4311           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4312         }
4313       }
4314       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4315     }
4316 
4317     /* recvs and sends of j-array are completed */
4318     i = nrecvs;
4319     while (i--) {
4320       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
4321     }
4322     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4323   } else if (scall == MAT_REUSE_MATRIX){
4324     sstartsj = *startsj;
4325     rstartsj = sstartsj + nsends +1;
4326     bufa     = *bufa_ptr;
4327     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4328     b_otha   = b_oth->a;
4329   } else {
4330     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4331   }
4332 
4333   /* a-array */
4334   /*---------*/
4335   /*  post receives of a-array */
4336   for (i=0; i<nrecvs; i++){
4337     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4338     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4339   }
4340 
4341   /* pack the outgoing message a-array */
4342   k = 0;
4343   for (i=0; i<nsends; i++){
4344     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4345     bufA = bufa+sstartsj[i];
4346     for (j=0; j<nrows; j++) {
4347       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4348       for (ll=0; ll<sbs; ll++){
4349         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4350         for (l=0; l<ncols; l++){
4351           *bufA++ = vals[l];
4352         }
4353         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4354       }
4355     }
4356     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4357   }
4358   /* recvs and sends of a-array are completed */
4359   i = nrecvs;
4360   while (i--) {
4361     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
4362   }
4363   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4364   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4365 
4366   if (scall == MAT_INITIAL_MATRIX){
4367     /* put together the new matrix */
4368     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4369 
4370     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4371     /* Since these are PETSc arrays, change flags to free them as necessary. */
4372     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4373     b_oth->free_a  = PETSC_TRUE;
4374     b_oth->free_ij = PETSC_TRUE;
4375     b_oth->nonew   = 0;
4376 
4377     ierr = PetscFree(bufj);CHKERRQ(ierr);
4378     if (!startsj || !bufa_ptr){
4379       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4380       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4381     } else {
4382       *startsj  = sstartsj;
4383       *bufa_ptr = bufa;
4384     }
4385   }
4386   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4387   PetscFunctionReturn(0);
4388 }
4389 
4390 #undef __FUNCT__
4391 #define __FUNCT__ "MatGetCommunicationStructs"
4392 /*@C
4393   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4394 
4395   Not Collective
4396 
4397   Input Parameters:
4398 . A - The matrix in mpiaij format
4399 
4400   Output Parameter:
4401 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4402 . colmap - A map from global column index to local index into lvec
4403 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4404 
4405   Level: developer
4406 
4407 @*/
4408 #if defined (PETSC_USE_CTABLE)
4409 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4410 #else
4411 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4412 #endif
4413 {
4414   Mat_MPIAIJ *a;
4415 
4416   PetscFunctionBegin;
4417   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4418   PetscValidPointer(lvec, 2)
4419   PetscValidPointer(colmap, 3)
4420   PetscValidPointer(multScatter, 4)
4421   a = (Mat_MPIAIJ *) A->data;
4422   if (lvec) *lvec = a->lvec;
4423   if (colmap) *colmap = a->colmap;
4424   if (multScatter) *multScatter = a->Mvctx;
4425   PetscFunctionReturn(0);
4426 }
4427 
4428 EXTERN_C_BEGIN
4429 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*);
4430 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*);
4431 EXTERN_C_END
4432 
4433 /*MC
4434    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4435 
4436    Options Database Keys:
4437 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4438 
4439   Level: beginner
4440 
4441 .seealso: MatCreateMPIAIJ
4442 M*/
4443 
4444 EXTERN_C_BEGIN
4445 #undef __FUNCT__
4446 #define __FUNCT__ "MatCreate_MPIAIJ"
4447 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4448 {
4449   Mat_MPIAIJ     *b;
4450   PetscErrorCode ierr;
4451   PetscMPIInt    size;
4452 
4453   PetscFunctionBegin;
4454   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
4455 
4456   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
4457   B->data         = (void*)b;
4458   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4459   B->factor       = 0;
4460   B->rmap.bs      = 1;
4461   B->assembled    = PETSC_FALSE;
4462   B->mapping      = 0;
4463 
4464   B->insertmode      = NOT_SET_VALUES;
4465   b->size            = size;
4466   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
4467 
4468   /* build cache for off array entries formed */
4469   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
4470   b->donotstash  = PETSC_FALSE;
4471   b->colmap      = 0;
4472   b->garray      = 0;
4473   b->roworiented = PETSC_TRUE;
4474 
4475   /* stuff used for matrix vector multiply */
4476   b->lvec      = PETSC_NULL;
4477   b->Mvctx     = PETSC_NULL;
4478 
4479   /* stuff for MatGetRow() */
4480   b->rowindices   = 0;
4481   b->rowvalues    = 0;
4482   b->getrowactive = PETSC_FALSE;
4483 
4484 
4485   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4486                                      "MatStoreValues_MPIAIJ",
4487                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4488   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4489                                      "MatRetrieveValues_MPIAIJ",
4490                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4491   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4492 				     "MatGetDiagonalBlock_MPIAIJ",
4493                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4494   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4495 				     "MatIsTranspose_MPIAIJ",
4496 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4497   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4498 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4499 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4500   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4501 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4502 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4503   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4504 				     "MatDiagonalScaleLocal_MPIAIJ",
4505 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4506   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4507                                      "MatConvert_MPIAIJ_MPICSRPERM",
4508                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
4509   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4510                                      "MatConvert_MPIAIJ_MPICRL",
4511                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
4512   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4513   PetscFunctionReturn(0);
4514 }
4515 EXTERN_C_END
4516 
4517 #undef __FUNCT__
4518 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
4519 /*@C
4520      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4521          and "off-diagonal" part of the matrix in CSR format.
4522 
4523    Collective on MPI_Comm
4524 
4525    Input Parameters:
4526 +  comm - MPI communicator
4527 .  m - number of local rows (Cannot be PETSC_DECIDE)
4528 .  n - This value should be the same as the local size used in creating the
4529        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4530        calculated if N is given) For square matrices n is almost always m.
4531 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4532 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4533 .   i - row indices for "diagonal" portion of matrix
4534 .   j - column indices
4535 .   a - matrix values
4536 .   oi - row indices for "off-diagonal" portion of matrix
4537 .   oj - column indices
4538 -   oa - matrix values
4539 
4540    Output Parameter:
4541 .   mat - the matrix
4542 
4543    Level: advanced
4544 
4545    Notes:
4546        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4547 
4548        The i and j indices are 0 based
4549 
4550        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4551 
4552 
4553 .keywords: matrix, aij, compressed row, sparse, parallel
4554 
4555 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4556           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4557 @*/
4558 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4559 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4560 {
4561   PetscErrorCode ierr;
4562   Mat_MPIAIJ     *maij;
4563 
4564  PetscFunctionBegin;
4565   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4566   if (i[0]) {
4567     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4568   }
4569   if (oi[0]) {
4570     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4571   }
4572   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4573   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4574   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4575   maij = (Mat_MPIAIJ*) (*mat)->data;
4576   maij->donotstash     = PETSC_TRUE;
4577   (*mat)->preallocated = PETSC_TRUE;
4578 
4579   (*mat)->rmap.bs = (*mat)->cmap.bs = 1;
4580   ierr = PetscMapInitialize((*mat)->comm,&(*mat)->rmap);CHKERRQ(ierr);
4581   ierr = PetscMapInitialize((*mat)->comm,&(*mat)->cmap);CHKERRQ(ierr);
4582 
4583   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
4584   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
4585 
4586   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4587   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4588   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4589   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4590 
4591   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4592   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4593   PetscFunctionReturn(0);
4594 }
4595 
4596 /*
4597     Special version for direct calls from Fortran
4598 */
4599 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4600 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4601 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4602 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4603 #endif
4604 
4605 /* Change these macros so can be used in void function */
4606 #undef CHKERRQ
4607 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
4608 #undef SETERRQ2
4609 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
4610 #undef SETERRQ
4611 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
4612 
4613 EXTERN_C_BEGIN
4614 #undef __FUNCT__
4615 #define __FUNCT__ "matsetvaluesmpiaij_"
4616 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4617 {
4618   Mat            mat = *mmat;
4619   PetscInt       m = *mm, n = *mn;
4620   InsertMode     addv = *maddv;
4621   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
4622   PetscScalar    value;
4623   PetscErrorCode ierr;
4624 
4625   MatPreallocated(mat);
4626   if (mat->insertmode == NOT_SET_VALUES) {
4627     mat->insertmode = addv;
4628   }
4629 #if defined(PETSC_USE_DEBUG)
4630   else if (mat->insertmode != addv) {
4631     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4632   }
4633 #endif
4634   {
4635   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4636   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4637   PetscTruth     roworiented = aij->roworiented;
4638 
4639   /* Some Variables required in the macro */
4640   Mat            A = aij->A;
4641   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
4642   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4643   PetscScalar    *aa = a->a;
4644   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4645   Mat            B = aij->B;
4646   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4647   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4648   PetscScalar    *ba = b->a;
4649 
4650   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4651   PetscInt       nonew = a->nonew;
4652   PetscScalar    *ap1,*ap2;
4653 
4654   PetscFunctionBegin;
4655   for (i=0; i<m; i++) {
4656     if (im[i] < 0) continue;
4657 #if defined(PETSC_USE_DEBUG)
4658     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4659 #endif
4660     if (im[i] >= rstart && im[i] < rend) {
4661       row      = im[i] - rstart;
4662       lastcol1 = -1;
4663       rp1      = aj + ai[row];
4664       ap1      = aa + ai[row];
4665       rmax1    = aimax[row];
4666       nrow1    = ailen[row];
4667       low1     = 0;
4668       high1    = nrow1;
4669       lastcol2 = -1;
4670       rp2      = bj + bi[row];
4671       ap2      = ba + bi[row];
4672       rmax2    = bimax[row];
4673       nrow2    = bilen[row];
4674       low2     = 0;
4675       high2    = nrow2;
4676 
4677       for (j=0; j<n; j++) {
4678         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4679         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4680         if (in[j] >= cstart && in[j] < cend){
4681           col = in[j] - cstart;
4682           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4683         } else if (in[j] < 0) continue;
4684 #if defined(PETSC_USE_DEBUG)
4685         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4686 #endif
4687         else {
4688           if (mat->was_assembled) {
4689             if (!aij->colmap) {
4690               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4691             }
4692 #if defined (PETSC_USE_CTABLE)
4693             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4694 	    col--;
4695 #else
4696             col = aij->colmap[in[j]] - 1;
4697 #endif
4698             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4699               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4700               col =  in[j];
4701               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4702               B = aij->B;
4703               b = (Mat_SeqAIJ*)B->data;
4704               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4705               rp2      = bj + bi[row];
4706               ap2      = ba + bi[row];
4707               rmax2    = bimax[row];
4708               nrow2    = bilen[row];
4709               low2     = 0;
4710               high2    = nrow2;
4711               bm       = aij->B->rmap.n;
4712               ba = b->a;
4713             }
4714           } else col = in[j];
4715           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4716         }
4717       }
4718     } else {
4719       if (!aij->donotstash) {
4720         if (roworiented) {
4721           if (ignorezeroentries && v[i*n] == 0.0) continue;
4722           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4723         } else {
4724           if (ignorezeroentries && v[i] == 0.0) continue;
4725           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4726         }
4727       }
4728     }
4729   }}
4730   PetscFunctionReturnVoid();
4731 }
4732 EXTERN_C_END
4733 
4734