xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 32b366c8169cf4a7c355a71e234e9efbfdf6409e) !
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = a->ignorezeroentries;
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatGetValues_MPIAIJ"
240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
241 {
242   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
243   PetscErrorCode ierr;
244   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
245   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
246 
247   PetscFunctionBegin;
248   for (i=0; i<m; i++) {
249     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
250     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
251     if (idxm[i] >= rstart && idxm[i] < rend) {
252       row = idxm[i] - rstart;
253       for (j=0; j<n; j++) {
254         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
255         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
256         if (idxn[j] >= cstart && idxn[j] < cend){
257           col = idxn[j] - cstart;
258           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
259         } else {
260           if (!aij->colmap) {
261             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
262           }
263 #if defined (PETSC_USE_CTABLE)
264           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
265           col --;
266 #else
267           col = aij->colmap[idxn[j]] - 1;
268 #endif
269           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
270           else {
271             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
272           }
273         }
274       }
275     } else {
276       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
277     }
278   }
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
287   PetscErrorCode ierr;
288   PetscInt       nstash,reallocs;
289   InsertMode     addv;
290 
291   PetscFunctionBegin;
292   if (aij->donotstash) {
293     PetscFunctionReturn(0);
294   }
295 
296   /* make sure all processors are either in INSERTMODE or ADDMODE */
297   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
298   if (addv == (ADD_VALUES|INSERT_VALUES)) {
299     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
300   }
301   mat->insertmode = addv; /* in case this processor had no cache */
302 
303   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
304   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
305   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
312 {
313   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
314   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
315   PetscErrorCode ierr;
316   PetscMPIInt    n;
317   PetscInt       i,j,rstart,ncols,flg;
318   PetscInt       *row,*col,other_disassembled;
319   PetscScalar    *val;
320   InsertMode     addv = mat->insertmode;
321 
322   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
323   PetscFunctionBegin;
324   if (!aij->donotstash) {
325     while (1) {
326       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
327       if (!flg) break;
328 
329       for (i=0; i<n;) {
330         /* Now identify the consecutive vals belonging to the same row */
331         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
332         if (j < n) ncols = j-i;
333         else       ncols = n-i;
334         /* Now assemble all these values with a single function call */
335         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
336         i = j;
337       }
338     }
339     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
340   }
341   a->compressedrow.use     = PETSC_FALSE;
342   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
344 
345   /* determine if any processor has disassembled, if so we must
346      also disassemble ourselfs, in order that we may reassemble. */
347   /*
348      if nonzero structure of submatrix B cannot change then we know that
349      no processor disassembled thus we can skip this stuff
350   */
351   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
352     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
353     if (mat->was_assembled && !other_disassembled) {
354       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
355     }
356   }
357   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
358     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
359   }
360   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
361   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
362   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
363   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
364 
365   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
366   aij->rowvalues = 0;
367 
368   /* used by MatAXPY() */
369   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
370   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
371 
372   PetscFunctionReturn(0);
373 }
374 
375 #undef __FUNCT__
376 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
377 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
378 {
379   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
380   PetscErrorCode ierr;
381 
382   PetscFunctionBegin;
383   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
384   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
385   PetscFunctionReturn(0);
386 }
387 
388 #undef __FUNCT__
389 #define __FUNCT__ "MatZeroRows_MPIAIJ"
390 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
391 {
392   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
393   PetscErrorCode ierr;
394   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
395   PetscInt       i,*owners = A->rmap.range;
396   PetscInt       *nprocs,j,idx,nsends,row;
397   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
398   PetscInt       *rvalues,count,base,slen,*source;
399   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
400   MPI_Comm       comm = A->comm;
401   MPI_Request    *send_waits,*recv_waits;
402   MPI_Status     recv_status,*send_status;
403 #if defined(PETSC_DEBUG)
404   PetscTruth     found = PETSC_FALSE;
405 #endif
406 
407   PetscFunctionBegin;
408   /*  first count number of contributors to each processor */
409   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
410   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
411   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
412   j = 0;
413   for (i=0; i<N; i++) {
414     if (lastidx > (idx = rows[i])) j = 0;
415     lastidx = idx;
416     for (; j<size; j++) {
417       if (idx >= owners[j] && idx < owners[j+1]) {
418         nprocs[2*j]++;
419         nprocs[2*j+1] = 1;
420         owner[i] = j;
421 #if defined(PETSC_DEBUG)
422         found = PETSC_TRUE;
423 #endif
424         break;
425       }
426     }
427 #if defined(PETSC_DEBUG)
428     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
429     found = PETSC_FALSE;
430 #endif
431   }
432   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
433 
434   /* inform other processors of number of messages and max length*/
435   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
436 
437   /* post receives:   */
438   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
439   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
440   for (i=0; i<nrecvs; i++) {
441     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
442   }
443 
444   /* do sends:
445       1) starts[i] gives the starting index in svalues for stuff going to
446          the ith processor
447   */
448   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
449   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
450   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
451   starts[0] = 0;
452   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
453   for (i=0; i<N; i++) {
454     svalues[starts[owner[i]]++] = rows[i];
455   }
456 
457   starts[0] = 0;
458   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
459   count = 0;
460   for (i=0; i<size; i++) {
461     if (nprocs[2*i+1]) {
462       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
463     }
464   }
465   ierr = PetscFree(starts);CHKERRQ(ierr);
466 
467   base = owners[rank];
468 
469   /*  wait on receives */
470   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
471   source = lens + nrecvs;
472   count  = nrecvs; slen = 0;
473   while (count) {
474     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
475     /* unpack receives into our local space */
476     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
477     source[imdex]  = recv_status.MPI_SOURCE;
478     lens[imdex]    = n;
479     slen          += n;
480     count--;
481   }
482   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
483 
484   /* move the data into the send scatter */
485   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
486   count = 0;
487   for (i=0; i<nrecvs; i++) {
488     values = rvalues + i*nmax;
489     for (j=0; j<lens[i]; j++) {
490       lrows[count++] = values[j] - base;
491     }
492   }
493   ierr = PetscFree(rvalues);CHKERRQ(ierr);
494   ierr = PetscFree(lens);CHKERRQ(ierr);
495   ierr = PetscFree(owner);CHKERRQ(ierr);
496   ierr = PetscFree(nprocs);CHKERRQ(ierr);
497 
498   /* actually zap the local rows */
499   /*
500         Zero the required rows. If the "diagonal block" of the matrix
501      is square and the user wishes to set the diagonal we use separate
502      code so that MatSetValues() is not called for each diagonal allocating
503      new memory, thus calling lots of mallocs and slowing things down.
504 
505        Contributed by: Matthew Knepley
506   */
507   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
508   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
509   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
510     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
511   } else if (diag != 0.0) {
512     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
513     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
514       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
515 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
516     }
517     for (i = 0; i < slen; i++) {
518       row  = lrows[i] + rstart;
519       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
520     }
521     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
522     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
523   } else {
524     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
525   }
526   ierr = PetscFree(lrows);CHKERRQ(ierr);
527 
528   /* wait on sends */
529   if (nsends) {
530     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
531     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
532     ierr = PetscFree(send_status);CHKERRQ(ierr);
533   }
534   ierr = PetscFree(send_waits);CHKERRQ(ierr);
535   ierr = PetscFree(svalues);CHKERRQ(ierr);
536 
537   PetscFunctionReturn(0);
538 }
539 
540 #undef __FUNCT__
541 #define __FUNCT__ "MatMult_MPIAIJ"
542 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
543 {
544   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
545   PetscErrorCode ierr;
546   PetscInt       nt;
547 
548   PetscFunctionBegin;
549   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
550   if (nt != A->cmap.n) {
551     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
552   }
553   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
554   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
555   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
556   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatMultAdd_MPIAIJ"
562 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
563 {
564   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
565   PetscErrorCode ierr;
566 
567   PetscFunctionBegin;
568   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
569   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
570   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
571   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 #undef __FUNCT__
576 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
577 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
578 {
579   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
580   PetscErrorCode ierr;
581   PetscTruth     merged;
582 
583   PetscFunctionBegin;
584   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
585   /* do nondiagonal part */
586   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
587   if (!merged) {
588     /* send it on its way */
589     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
590     /* do local part */
591     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
592     /* receive remote parts: note this assumes the values are not actually */
593     /* added in yy until the next line, */
594     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
595   } else {
596     /* do local part */
597     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
598     /* send it on its way */
599     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
600     /* values actually were received in the Begin() but we need to call this nop */
601     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
602   }
603   PetscFunctionReturn(0);
604 }
605 
606 EXTERN_C_BEGIN
607 #undef __FUNCT__
608 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
609 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
610 {
611   MPI_Comm       comm;
612   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
613   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
614   IS             Me,Notme;
615   PetscErrorCode ierr;
616   PetscInt       M,N,first,last,*notme,i;
617   PetscMPIInt    size;
618 
619   PetscFunctionBegin;
620 
621   /* Easy test: symmetric diagonal block */
622   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
623   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
624   if (!*f) PetscFunctionReturn(0);
625   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
626   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
627   if (size == 1) PetscFunctionReturn(0);
628 
629   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
630   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
631   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
632   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
633   for (i=0; i<first; i++) notme[i] = i;
634   for (i=last; i<M; i++) notme[i-last+first] = i;
635   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
636   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
637   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
638   Aoff = Aoffs[0];
639   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
640   Boff = Boffs[0];
641   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
642   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
643   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
644   ierr = ISDestroy(Me);CHKERRQ(ierr);
645   ierr = ISDestroy(Notme);CHKERRQ(ierr);
646 
647   PetscFunctionReturn(0);
648 }
649 EXTERN_C_END
650 
651 #undef __FUNCT__
652 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
653 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
654 {
655   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
656   PetscErrorCode ierr;
657 
658   PetscFunctionBegin;
659   /* do nondiagonal part */
660   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
661   /* send it on its way */
662   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
663   /* do local part */
664   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
665   /* receive remote parts */
666   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
667   PetscFunctionReturn(0);
668 }
669 
670 /*
671   This only works correctly for square matrices where the subblock A->A is the
672    diagonal block
673 */
674 #undef __FUNCT__
675 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
676 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
677 {
678   PetscErrorCode ierr;
679   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
680 
681   PetscFunctionBegin;
682   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
683   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
684     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
685   }
686   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 
690 #undef __FUNCT__
691 #define __FUNCT__ "MatScale_MPIAIJ"
692 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
693 {
694   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
695   PetscErrorCode ierr;
696 
697   PetscFunctionBegin;
698   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
699   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
700   PetscFunctionReturn(0);
701 }
702 
703 #undef __FUNCT__
704 #define __FUNCT__ "MatDestroy_MPIAIJ"
705 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
706 {
707   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
708   PetscErrorCode ierr;
709 
710   PetscFunctionBegin;
711 #if defined(PETSC_USE_LOG)
712   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
713 #endif
714   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
715   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
716   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
717 #if defined (PETSC_USE_CTABLE)
718   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
719 #else
720   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
721 #endif
722   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
723   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
724   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
725   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
726   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
727   ierr = PetscFree(aij);CHKERRQ(ierr);
728 
729   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
730   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
736   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatView_MPIAIJ_Binary"
742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
743 {
744   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
745   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
746   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
747   PetscErrorCode    ierr;
748   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
749   int               fd;
750   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
751   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
752   PetscScalar       *column_values;
753 
754   PetscFunctionBegin;
755   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
756   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
757   nz   = A->nz + B->nz;
758   if (!rank) {
759     header[0] = MAT_FILE_COOKIE;
760     header[1] = mat->rmap.N;
761     header[2] = mat->cmap.N;
762     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
763     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
764     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
765     /* get largest number of rows any processor has */
766     rlen = mat->rmap.n;
767     range = mat->rmap.range;
768     for (i=1; i<size; i++) {
769       rlen = PetscMax(rlen,range[i+1] - range[i]);
770     }
771   } else {
772     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
773     rlen = mat->rmap.n;
774   }
775 
776   /* load up the local row counts */
777   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
778   for (i=0; i<mat->rmap.n; i++) {
779     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
780   }
781 
782   /* store the row lengths to the file */
783   if (!rank) {
784     MPI_Status status;
785     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
786     for (i=1; i<size; i++) {
787       rlen = range[i+1] - range[i];
788       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
789       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
790     }
791   } else {
792     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
793   }
794   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
795 
796   /* load up the local column indices */
797   nzmax = nz; /* )th processor needs space a largest processor needs */
798   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
799   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
800   cnt  = 0;
801   for (i=0; i<mat->rmap.n; i++) {
802     for (j=B->i[i]; j<B->i[i+1]; j++) {
803       if ( (col = garray[B->j[j]]) > cstart) break;
804       column_indices[cnt++] = col;
805     }
806     for (k=A->i[i]; k<A->i[i+1]; k++) {
807       column_indices[cnt++] = A->j[k] + cstart;
808     }
809     for (; j<B->i[i+1]; j++) {
810       column_indices[cnt++] = garray[B->j[j]];
811     }
812   }
813   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
814 
815   /* store the column indices to the file */
816   if (!rank) {
817     MPI_Status status;
818     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
819     for (i=1; i<size; i++) {
820       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
821       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
822       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
823       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
824     }
825   } else {
826     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
827     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
828   }
829   ierr = PetscFree(column_indices);CHKERRQ(ierr);
830 
831   /* load up the local column values */
832   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
833   cnt  = 0;
834   for (i=0; i<mat->rmap.n; i++) {
835     for (j=B->i[i]; j<B->i[i+1]; j++) {
836       if ( garray[B->j[j]] > cstart) break;
837       column_values[cnt++] = B->a[j];
838     }
839     for (k=A->i[i]; k<A->i[i+1]; k++) {
840       column_values[cnt++] = A->a[k];
841     }
842     for (; j<B->i[i+1]; j++) {
843       column_values[cnt++] = B->a[j];
844     }
845   }
846   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
847 
848   /* store the column values to the file */
849   if (!rank) {
850     MPI_Status status;
851     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
852     for (i=1; i<size; i++) {
853       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
854       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
855       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
856       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
857     }
858   } else {
859     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
860     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
861   }
862   ierr = PetscFree(column_values);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
869 {
870   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
871   PetscErrorCode    ierr;
872   PetscMPIInt       rank = aij->rank,size = aij->size;
873   PetscTruth        isdraw,iascii,isbinary;
874   PetscViewer       sviewer;
875   PetscViewerFormat format;
876 
877   PetscFunctionBegin;
878   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
880   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
881   if (iascii) {
882     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
883     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
884       MatInfo    info;
885       PetscTruth inodes;
886 
887       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
888       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
889       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
890       if (!inodes) {
891         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
892 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
893       } else {
894         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
895 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
896       }
897       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
898       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
899       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
900       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
901       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
902       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
903       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
904       PetscFunctionReturn(0);
905     } else if (format == PETSC_VIEWER_ASCII_INFO) {
906       PetscInt   inodecount,inodelimit,*inodes;
907       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
908       if (inodes) {
909         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
910       } else {
911         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
912       }
913       PetscFunctionReturn(0);
914     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
915       PetscFunctionReturn(0);
916     }
917   } else if (isbinary) {
918     if (size == 1) {
919       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
920       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
921     } else {
922       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
923     }
924     PetscFunctionReturn(0);
925   } else if (isdraw) {
926     PetscDraw  draw;
927     PetscTruth isnull;
928     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
929     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
930   }
931 
932   if (size == 1) {
933     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
934     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
935   } else {
936     /* assemble the entire matrix onto first processor. */
937     Mat         A;
938     Mat_SeqAIJ  *Aloc;
939     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
940     PetscScalar *a;
941 
942     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
943     if (!rank) {
944       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
945     } else {
946       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
947     }
948     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
949     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
950     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
951     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
952 
953     /* copy over the A part */
954     Aloc = (Mat_SeqAIJ*)aij->A->data;
955     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
956     row = mat->rmap.rstart;
957     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
958     for (i=0; i<m; i++) {
959       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
960       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
961     }
962     aj = Aloc->j;
963     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
964 
965     /* copy over the B part */
966     Aloc = (Mat_SeqAIJ*)aij->B->data;
967     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
968     row  = mat->rmap.rstart;
969     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
970     ct   = cols;
971     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
972     for (i=0; i<m; i++) {
973       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
974       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
975     }
976     ierr = PetscFree(ct);CHKERRQ(ierr);
977     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
979     /*
980        Everyone has to call to draw the matrix since the graphics waits are
981        synchronized across all processors that share the PetscDraw object
982     */
983     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
984     if (!rank) {
985       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
986       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
987     }
988     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
989     ierr = MatDestroy(A);CHKERRQ(ierr);
990   }
991   PetscFunctionReturn(0);
992 }
993 
994 #undef __FUNCT__
995 #define __FUNCT__ "MatView_MPIAIJ"
996 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
997 {
998   PetscErrorCode ierr;
999   PetscTruth     iascii,isdraw,issocket,isbinary;
1000 
1001   PetscFunctionBegin;
1002   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1003   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1004   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1005   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1006   if (iascii || isdraw || isbinary || issocket) {
1007     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1008   } else {
1009     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1010   }
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatRelax_MPIAIJ"
1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1017 {
1018   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1019   PetscErrorCode ierr;
1020   Vec            bb1;
1021 
1022   PetscFunctionBegin;
1023   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1024 
1025   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1026 
1027   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1028     if (flag & SOR_ZERO_INITIAL_GUESS) {
1029       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1030       its--;
1031     }
1032 
1033     while (its--) {
1034       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1035       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036 
1037       /* update rhs: bb1 = bb - B*x */
1038       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1039       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1040 
1041       /* local sweep */
1042       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1043       CHKERRQ(ierr);
1044     }
1045   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1046     if (flag & SOR_ZERO_INITIAL_GUESS) {
1047       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1048       its--;
1049     }
1050     while (its--) {
1051       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1052       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1053 
1054       /* update rhs: bb1 = bb - B*x */
1055       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1056       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1057 
1058       /* local sweep */
1059       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1060       CHKERRQ(ierr);
1061     }
1062   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1063     if (flag & SOR_ZERO_INITIAL_GUESS) {
1064       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1065       its--;
1066     }
1067     while (its--) {
1068       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1069       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1070 
1071       /* update rhs: bb1 = bb - B*x */
1072       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1073       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1074 
1075       /* local sweep */
1076       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1077       CHKERRQ(ierr);
1078     }
1079   } else {
1080     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1081   }
1082 
1083   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatPermute_MPIAIJ"
1089 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1090 {
1091   MPI_Comm       comm,pcomm;
1092   PetscInt       first,local_size,nrows,*rows;
1093   int            ntids;
1094   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1095   PetscErrorCode ierr;
1096 
1097   PetscFunctionBegin;
1098   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1099   /* make a collective version of 'rowp' */
1100   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1101   if (pcomm==comm) {
1102     crowp = rowp;
1103   } else {
1104     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1105     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1106     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1107     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1108   }
1109   /* collect the global row permutation and invert it */
1110   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1111   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1112   if (pcomm!=comm) {
1113     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1114   }
1115   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1116   /* get the local target indices */
1117   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1118   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1119   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1120   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1121   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1122   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1123   /* the column permutation is so much easier;
1124      make a local version of 'colp' and invert it */
1125   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1126   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1127   if (ntids==1) {
1128     lcolp = colp;
1129   } else {
1130     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1131     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1132     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1133   }
1134   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1135   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1136   if (ntids>1) {
1137     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1138     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1139   }
1140   /* now we just get the submatrix */
1141   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1142   /* clean up */
1143   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1144   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1150 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1151 {
1152   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1153   Mat            A = mat->A,B = mat->B;
1154   PetscErrorCode ierr;
1155   PetscReal      isend[5],irecv[5];
1156 
1157   PetscFunctionBegin;
1158   info->block_size     = 1.0;
1159   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1160   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1161   isend[3] = info->memory;  isend[4] = info->mallocs;
1162   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1163   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1164   isend[3] += info->memory;  isend[4] += info->mallocs;
1165   if (flag == MAT_LOCAL) {
1166     info->nz_used      = isend[0];
1167     info->nz_allocated = isend[1];
1168     info->nz_unneeded  = isend[2];
1169     info->memory       = isend[3];
1170     info->mallocs      = isend[4];
1171   } else if (flag == MAT_GLOBAL_MAX) {
1172     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1173     info->nz_used      = irecv[0];
1174     info->nz_allocated = irecv[1];
1175     info->nz_unneeded  = irecv[2];
1176     info->memory       = irecv[3];
1177     info->mallocs      = irecv[4];
1178   } else if (flag == MAT_GLOBAL_SUM) {
1179     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1180     info->nz_used      = irecv[0];
1181     info->nz_allocated = irecv[1];
1182     info->nz_unneeded  = irecv[2];
1183     info->memory       = irecv[3];
1184     info->mallocs      = irecv[4];
1185   }
1186   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1187   info->fill_ratio_needed = 0;
1188   info->factor_mallocs    = 0;
1189   info->rows_global       = (double)matin->rmap.N;
1190   info->columns_global    = (double)matin->cmap.N;
1191   info->rows_local        = (double)matin->rmap.n;
1192   info->columns_local     = (double)matin->cmap.N;
1193 
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "MatSetOption_MPIAIJ"
1199 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1200 {
1201   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1202   PetscErrorCode ierr;
1203 
1204   PetscFunctionBegin;
1205   switch (op) {
1206   case MAT_NO_NEW_NONZERO_LOCATIONS:
1207   case MAT_YES_NEW_NONZERO_LOCATIONS:
1208   case MAT_COLUMNS_UNSORTED:
1209   case MAT_COLUMNS_SORTED:
1210   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1211   case MAT_KEEP_ZEROED_ROWS:
1212   case MAT_NEW_NONZERO_LOCATION_ERR:
1213   case MAT_USE_INODES:
1214   case MAT_DO_NOT_USE_INODES:
1215   case MAT_IGNORE_ZERO_ENTRIES:
1216     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1217     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1218     break;
1219   case MAT_ROW_ORIENTED:
1220     a->roworiented = PETSC_TRUE;
1221     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1222     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1223     break;
1224   case MAT_ROWS_SORTED:
1225   case MAT_ROWS_UNSORTED:
1226   case MAT_YES_NEW_DIAGONALS:
1227     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1228     break;
1229   case MAT_COLUMN_ORIENTED:
1230     a->roworiented = PETSC_FALSE;
1231     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1232     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1233     break;
1234   case MAT_IGNORE_OFF_PROC_ENTRIES:
1235     a->donotstash = PETSC_TRUE;
1236     break;
1237   case MAT_NO_NEW_DIAGONALS:
1238     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1239   case MAT_SYMMETRIC:
1240     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1241     break;
1242   case MAT_STRUCTURALLY_SYMMETRIC:
1243   case MAT_HERMITIAN:
1244   case MAT_SYMMETRY_ETERNAL:
1245     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1246     break;
1247   case MAT_NOT_SYMMETRIC:
1248   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1249   case MAT_NOT_HERMITIAN:
1250   case MAT_NOT_SYMMETRY_ETERNAL:
1251     break;
1252   default:
1253     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatGetRow_MPIAIJ"
1260 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1261 {
1262   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1263   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1264   PetscErrorCode ierr;
1265   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1266   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1267   PetscInt       *cmap,*idx_p;
1268 
1269   PetscFunctionBegin;
1270   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1271   mat->getrowactive = PETSC_TRUE;
1272 
1273   if (!mat->rowvalues && (idx || v)) {
1274     /*
1275         allocate enough space to hold information from the longest row.
1276     */
1277     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1278     PetscInt     max = 1,tmp;
1279     for (i=0; i<matin->rmap.n; i++) {
1280       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1281       if (max < tmp) { max = tmp; }
1282     }
1283     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1284     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1285   }
1286 
1287   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1288   lrow = row - rstart;
1289 
1290   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1291   if (!v)   {pvA = 0; pvB = 0;}
1292   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1293   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1294   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1295   nztot = nzA + nzB;
1296 
1297   cmap  = mat->garray;
1298   if (v  || idx) {
1299     if (nztot) {
1300       /* Sort by increasing column numbers, assuming A and B already sorted */
1301       PetscInt imark = -1;
1302       if (v) {
1303         *v = v_p = mat->rowvalues;
1304         for (i=0; i<nzB; i++) {
1305           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1306           else break;
1307         }
1308         imark = i;
1309         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1310         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1311       }
1312       if (idx) {
1313         *idx = idx_p = mat->rowindices;
1314         if (imark > -1) {
1315           for (i=0; i<imark; i++) {
1316             idx_p[i] = cmap[cworkB[i]];
1317           }
1318         } else {
1319           for (i=0; i<nzB; i++) {
1320             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1321             else break;
1322           }
1323           imark = i;
1324         }
1325         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1326         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1327       }
1328     } else {
1329       if (idx) *idx = 0;
1330       if (v)   *v   = 0;
1331     }
1332   }
1333   *nz = nztot;
1334   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1335   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #undef __FUNCT__
1340 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1341 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1342 {
1343   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1344 
1345   PetscFunctionBegin;
1346   if (!aij->getrowactive) {
1347     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1348   }
1349   aij->getrowactive = PETSC_FALSE;
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatNorm_MPIAIJ"
1355 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1356 {
1357   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1358   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1359   PetscErrorCode ierr;
1360   PetscInt       i,j,cstart = mat->cmap.rstart;
1361   PetscReal      sum = 0.0;
1362   PetscScalar    *v;
1363 
1364   PetscFunctionBegin;
1365   if (aij->size == 1) {
1366     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1367   } else {
1368     if (type == NORM_FROBENIUS) {
1369       v = amat->a;
1370       for (i=0; i<amat->nz; i++) {
1371 #if defined(PETSC_USE_COMPLEX)
1372         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1373 #else
1374         sum += (*v)*(*v); v++;
1375 #endif
1376       }
1377       v = bmat->a;
1378       for (i=0; i<bmat->nz; i++) {
1379 #if defined(PETSC_USE_COMPLEX)
1380         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1381 #else
1382         sum += (*v)*(*v); v++;
1383 #endif
1384       }
1385       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1386       *norm = sqrt(*norm);
1387     } else if (type == NORM_1) { /* max column norm */
1388       PetscReal *tmp,*tmp2;
1389       PetscInt    *jj,*garray = aij->garray;
1390       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1391       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1392       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1393       *norm = 0.0;
1394       v = amat->a; jj = amat->j;
1395       for (j=0; j<amat->nz; j++) {
1396         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1397       }
1398       v = bmat->a; jj = bmat->j;
1399       for (j=0; j<bmat->nz; j++) {
1400         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1401       }
1402       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1403       for (j=0; j<mat->cmap.N; j++) {
1404         if (tmp2[j] > *norm) *norm = tmp2[j];
1405       }
1406       ierr = PetscFree(tmp);CHKERRQ(ierr);
1407       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1408     } else if (type == NORM_INFINITY) { /* max row norm */
1409       PetscReal ntemp = 0.0;
1410       for (j=0; j<aij->A->rmap.n; j++) {
1411         v = amat->a + amat->i[j];
1412         sum = 0.0;
1413         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1414           sum += PetscAbsScalar(*v); v++;
1415         }
1416         v = bmat->a + bmat->i[j];
1417         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1418           sum += PetscAbsScalar(*v); v++;
1419         }
1420         if (sum > ntemp) ntemp = sum;
1421       }
1422       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1423     } else {
1424       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1425     }
1426   }
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 #undef __FUNCT__
1431 #define __FUNCT__ "MatTranspose_MPIAIJ"
1432 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1433 {
1434   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1435   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1436   PetscErrorCode ierr;
1437   PetscInt       M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz;
1438   PetscInt       cstart=A->cmap.rstart,ncol;
1439   Mat            B;
1440   PetscScalar    *array;
1441 
1442   PetscFunctionBegin;
1443   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1444 
1445   /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1446   ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n;
1447   ai = Aloc->i; aj = Aloc->j;
1448   bi = Bloc->i; bj = Bloc->j;
1449   ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1450   cols = d_nnz + na + 1; /* work space to be used by B part */
1451   ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1452   for (i=0; i<ai[ma]; i++){
1453     d_nnz[aj[i]] ++;
1454     aj[i] += cstart; /* global col index to be used by MatSetValues() */
1455   }
1456 
1457   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1458   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1459   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1460   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1461 
1462   /* copy over the A part */
1463   array = Aloc->a;
1464   row = A->rmap.rstart;
1465   for (i=0; i<ma; i++) {
1466     ncol = ai[i+1]-ai[i];
1467     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1468     row++; array += ncol; aj += ncol;
1469   }
1470   aj = Aloc->j;
1471   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1472 
1473   /* copy over the B part */
1474   array = Bloc->a;
1475   row = A->rmap.rstart;
1476   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1477   for (i=0; i<mb; i++) {
1478     ncol = bi[i+1]-bi[i];
1479     ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1480     row++; array += ncol; cols += ncol;
1481   }
1482   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1483   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1484   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1485   if (matout) {
1486     *matout = B;
1487   } else {
1488     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1489   }
1490   PetscFunctionReturn(0);
1491 }
1492 
1493 #undef __FUNCT__
1494 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1495 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1496 {
1497   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1498   Mat            a = aij->A,b = aij->B;
1499   PetscErrorCode ierr;
1500   PetscInt       s1,s2,s3;
1501 
1502   PetscFunctionBegin;
1503   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1504   if (rr) {
1505     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1506     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1507     /* Overlap communication with computation. */
1508     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1509   }
1510   if (ll) {
1511     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1512     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1513     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1514   }
1515   /* scale  the diagonal block */
1516   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1517 
1518   if (rr) {
1519     /* Do a scatter end and then right scale the off-diagonal block */
1520     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1521     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1522   }
1523 
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 #undef __FUNCT__
1528 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1529 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1530 {
1531   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1532   PetscErrorCode ierr;
1533 
1534   PetscFunctionBegin;
1535   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1536   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1537   PetscFunctionReturn(0);
1538 }
1539 #undef __FUNCT__
1540 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1541 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1542 {
1543   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1544   PetscErrorCode ierr;
1545 
1546   PetscFunctionBegin;
1547   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1548   PetscFunctionReturn(0);
1549 }
1550 
1551 #undef __FUNCT__
1552 #define __FUNCT__ "MatEqual_MPIAIJ"
1553 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1554 {
1555   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1556   Mat            a,b,c,d;
1557   PetscTruth     flg;
1558   PetscErrorCode ierr;
1559 
1560   PetscFunctionBegin;
1561   a = matA->A; b = matA->B;
1562   c = matB->A; d = matB->B;
1563 
1564   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1565   if (flg) {
1566     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1567   }
1568   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1569   PetscFunctionReturn(0);
1570 }
1571 
1572 #undef __FUNCT__
1573 #define __FUNCT__ "MatCopy_MPIAIJ"
1574 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1575 {
1576   PetscErrorCode ierr;
1577   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1578   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1579 
1580   PetscFunctionBegin;
1581   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1582   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1583     /* because of the column compression in the off-processor part of the matrix a->B,
1584        the number of columns in a->B and b->B may be different, hence we cannot call
1585        the MatCopy() directly on the two parts. If need be, we can provide a more
1586        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1587        then copying the submatrices */
1588     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1589   } else {
1590     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1591     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1592   }
1593   PetscFunctionReturn(0);
1594 }
1595 
1596 #undef __FUNCT__
1597 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1598 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1599 {
1600   PetscErrorCode ierr;
1601 
1602   PetscFunctionBegin;
1603   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1604   PetscFunctionReturn(0);
1605 }
1606 
1607 #include "petscblaslapack.h"
1608 #undef __FUNCT__
1609 #define __FUNCT__ "MatAXPY_MPIAIJ"
1610 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1611 {
1612   PetscErrorCode ierr;
1613   PetscInt       i;
1614   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1615   PetscBLASInt   bnz,one=1;
1616   Mat_SeqAIJ     *x,*y;
1617 
1618   PetscFunctionBegin;
1619   if (str == SAME_NONZERO_PATTERN) {
1620     PetscScalar alpha = a;
1621     x = (Mat_SeqAIJ *)xx->A->data;
1622     y = (Mat_SeqAIJ *)yy->A->data;
1623     bnz = (PetscBLASInt)x->nz;
1624     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1625     x = (Mat_SeqAIJ *)xx->B->data;
1626     y = (Mat_SeqAIJ *)yy->B->data;
1627     bnz = (PetscBLASInt)x->nz;
1628     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1629   } else if (str == SUBSET_NONZERO_PATTERN) {
1630     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1631 
1632     x = (Mat_SeqAIJ *)xx->B->data;
1633     y = (Mat_SeqAIJ *)yy->B->data;
1634     if (y->xtoy && y->XtoY != xx->B) {
1635       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1636       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1637     }
1638     if (!y->xtoy) { /* get xtoy */
1639       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1640       y->XtoY = xx->B;
1641       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1642     }
1643     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1644   } else {
1645     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1646   }
1647   PetscFunctionReturn(0);
1648 }
1649 
1650 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1651 
1652 #undef __FUNCT__
1653 #define __FUNCT__ "MatConjugate_MPIAIJ"
1654 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1655 {
1656 #if defined(PETSC_USE_COMPLEX)
1657   PetscErrorCode ierr;
1658   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1659 
1660   PetscFunctionBegin;
1661   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1662   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1663 #else
1664   PetscFunctionBegin;
1665 #endif
1666   PetscFunctionReturn(0);
1667 }
1668 
1669 #undef __FUNCT__
1670 #define __FUNCT__ "MatRealPart_MPIAIJ"
1671 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1672 {
1673   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1674   PetscErrorCode ierr;
1675 
1676   PetscFunctionBegin;
1677   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1678   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 #undef __FUNCT__
1683 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1684 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1685 {
1686   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1687   PetscErrorCode ierr;
1688 
1689   PetscFunctionBegin;
1690   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1691   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1692   PetscFunctionReturn(0);
1693 }
1694 
1695 #ifdef PETSC_HAVE_PBGL
1696 
1697 #include <boost/parallel/mpi/bsp_process_group.hpp>
1698 #include <boost/graph/distributed/ilu_default_graph.hpp>
1699 #include <boost/graph/distributed/ilu_0_block.hpp>
1700 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1701 #include <boost/graph/distributed/petsc/interface.hpp>
1702 #include <boost/multi_array.hpp>
1703 #include <boost/parallel/distributed_property_map.hpp>
1704 
1705 #undef __FUNCT__
1706 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1707 /*
1708   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1709 */
1710 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1711 {
1712   namespace petsc = boost::distributed::petsc;
1713 
1714   namespace graph_dist = boost::graph::distributed;
1715   using boost::graph::distributed::ilu_default::process_group_type;
1716   using boost::graph::ilu_permuted;
1717 
1718   PetscTruth      row_identity, col_identity;
1719   PetscContainer  c;
1720   PetscInt        m, n, M, N;
1721   PetscErrorCode  ierr;
1722 
1723   PetscFunctionBegin;
1724   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1725   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1726   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1727   if (!row_identity || !col_identity) {
1728     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1729   }
1730 
1731   process_group_type pg;
1732   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1733   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1734   lgraph_type&   level_graph = *lgraph_p;
1735   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1736 
1737   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1738   ilu_permuted(level_graph);
1739 
1740   /* put together the new matrix */
1741   ierr = MatCreate(A->comm, fact);CHKERRQ(ierr);
1742   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1743   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1744   ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr);
1745   ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr);
1746   ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1747   ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1748   (*fact)->factor = FACTOR_LU;
1749 
1750   ierr = PetscContainerCreate(A->comm, &c);
1751   ierr = PetscContainerSetPointer(c, lgraph_p);
1752   ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1753   PetscFunctionReturn(0);
1754 }
1755 
1756 #undef __FUNCT__
1757 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1758 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1759 {
1760   PetscFunctionBegin;
1761   PetscFunctionReturn(0);
1762 }
1763 
1764 #undef __FUNCT__
1765 #define __FUNCT__ "MatSolve_MPIAIJ"
1766 /*
1767   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1768 */
1769 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1770 {
1771   namespace graph_dist = boost::graph::distributed;
1772 
1773   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1774   lgraph_type*   lgraph_p;
1775   PetscContainer c;
1776   PetscErrorCode ierr;
1777 
1778   PetscFunctionBegin;
1779   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1780   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1781   ierr = VecCopy(b, x); CHKERRQ(ierr);
1782 
1783   PetscScalar* array_x;
1784   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1785   PetscInt sx;
1786   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1787 
1788   PetscScalar* array_b;
1789   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1790   PetscInt sb;
1791   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1792 
1793   lgraph_type&   level_graph = *lgraph_p;
1794   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1795 
1796   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1797   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1798                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1799 
1800   typedef boost::iterator_property_map<array_ref_type::iterator,
1801                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1802   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1803                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1804 
1805   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1806 
1807   PetscFunctionReturn(0);
1808 }
1809 #endif
1810 
1811 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1812   PetscInt       nzlocal,nsends,nrecvs;
1813   PetscMPIInt    *send_rank;
1814   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
1815   PetscScalar    *sbuf_a,**rbuf_a;
1816   PetscErrorCode (*MatDestroy)(Mat);
1817 } Mat_Redundant;
1818 
1819 #undef __FUNCT__
1820 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1821 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1822 {
1823   PetscErrorCode       ierr;
1824   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1825   PetscInt             i;
1826 
1827   PetscFunctionBegin;
1828   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1829   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1830   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1831   for (i=0; i<redund->nrecvs; i++){
1832     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1833     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1834   }
1835   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1836   ierr = PetscFree(redund);CHKERRQ(ierr);
1837   PetscFunctionReturn(0);
1838 }
1839 
1840 #undef __FUNCT__
1841 #define __FUNCT__ "MatDestroy_MatRedundant"
1842 PetscErrorCode MatDestroy_MatRedundant(Mat A)
1843 {
1844   PetscErrorCode  ierr;
1845   PetscContainer  container;
1846   Mat_Redundant   *redund=PETSC_NULL;
1847 
1848   PetscFunctionBegin;
1849   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1850   if (container) {
1851     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1852   } else {
1853     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1854   }
1855   A->ops->destroy = redund->MatDestroy;
1856   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
1857   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
1858   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
1859   PetscFunctionReturn(0);
1860 }
1861 
1862 #undef __FUNCT__
1863 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
1864 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
1865 {
1866   PetscMPIInt    rank,size;
1867   MPI_Comm       comm=mat->comm;
1868   PetscErrorCode ierr;
1869   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
1870   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
1871   PetscInt       *rowrange=mat->rmap.range;
1872   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1873   Mat            A=aij->A,B=aij->B,C=*matredundant;
1874   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
1875   PetscScalar    *sbuf_a;
1876   PetscInt       nzlocal=a->nz+b->nz;
1877   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
1878   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
1879   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
1880   PetscScalar    *vals,*aworkA,*aworkB;
1881   PetscMPIInt    tag1,tag2,tag3,imdex;
1882   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
1883                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
1884   MPI_Status     recv_status,*send_status;
1885   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
1886   PetscInt       **rbuf_j=PETSC_NULL;
1887   PetscScalar    **rbuf_a=PETSC_NULL;
1888   Mat_Redundant  *redund=PETSC_NULL;
1889   PetscContainer container;
1890 
1891   PetscFunctionBegin;
1892   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1893   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1894 
1895   if (reuse == MAT_REUSE_MATRIX) {
1896     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
1897     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
1898     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
1899     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
1900     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1901     if (container) {
1902       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1903     } else {
1904       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1905     }
1906     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
1907 
1908     nsends    = redund->nsends;
1909     nrecvs    = redund->nrecvs;
1910     send_rank = redund->send_rank; recv_rank = send_rank + size;
1911     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
1912     sbuf_j    = redund->sbuf_j;
1913     sbuf_a    = redund->sbuf_a;
1914     rbuf_j    = redund->rbuf_j;
1915     rbuf_a    = redund->rbuf_a;
1916   }
1917 
1918   if (reuse == MAT_INITIAL_MATRIX){
1919     PetscMPIInt  subrank,subsize;
1920     PetscInt     nleftover,np_subcomm;
1921     /* get the destination processors' id send_rank, nsends and nrecvs */
1922     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
1923     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
1924     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
1925     recv_rank = send_rank + size;
1926     np_subcomm = size/nsubcomm;
1927     nleftover  = size - nsubcomm*np_subcomm;
1928     nsends = 0; nrecvs = 0;
1929     for (i=0; i<size; i++){ /* i=rank*/
1930       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
1931         send_rank[nsends] = i; nsends++;
1932         recv_rank[nrecvs++] = i;
1933       }
1934     }
1935     if (rank >= size - nleftover){/* this proc is a leftover processor */
1936       i = size-nleftover-1;
1937       j = 0;
1938       while (j < nsubcomm - nleftover){
1939         send_rank[nsends++] = i;
1940         i--; j++;
1941       }
1942     }
1943 
1944     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
1945       for (i=0; i<nleftover; i++){
1946         recv_rank[nrecvs++] = size-nleftover+i;
1947       }
1948     }
1949 
1950     /* allocate sbuf_j, sbuf_a */
1951     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
1952     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
1953     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
1954   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1955 
1956   /* copy mat's local entries into the buffers */
1957   if (reuse == MAT_INITIAL_MATRIX){
1958     rownz_max = 0;
1959     rptr = sbuf_j;
1960     cols = sbuf_j + rend-rstart + 1;
1961     vals = sbuf_a;
1962     rptr[0] = 0;
1963     for (i=0; i<rend-rstart; i++){
1964       row = i + rstart;
1965       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1966       ncols  = nzA + nzB;
1967       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1968       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1969       /* load the column indices for this row into cols */
1970       lwrite = 0;
1971       for (l=0; l<nzB; l++) {
1972         if ((ctmp = bmap[cworkB[l]]) < cstart){
1973           vals[lwrite]   = aworkB[l];
1974           cols[lwrite++] = ctmp;
1975         }
1976       }
1977       for (l=0; l<nzA; l++){
1978         vals[lwrite]   = aworkA[l];
1979         cols[lwrite++] = cstart + cworkA[l];
1980       }
1981       for (l=0; l<nzB; l++) {
1982         if ((ctmp = bmap[cworkB[l]]) >= cend){
1983           vals[lwrite]   = aworkB[l];
1984           cols[lwrite++] = ctmp;
1985         }
1986       }
1987       vals += ncols;
1988       cols += ncols;
1989       rptr[i+1] = rptr[i] + ncols;
1990       if (rownz_max < ncols) rownz_max = ncols;
1991     }
1992     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
1993   } else { /* only copy matrix values into sbuf_a */
1994     rptr = sbuf_j;
1995     vals = sbuf_a;
1996     rptr[0] = 0;
1997     for (i=0; i<rend-rstart; i++){
1998       row = i + rstart;
1999       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2000       ncols  = nzA + nzB;
2001       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2002       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2003       lwrite = 0;
2004       for (l=0; l<nzB; l++) {
2005         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2006       }
2007       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2008       for (l=0; l<nzB; l++) {
2009         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2010       }
2011       vals += ncols;
2012       rptr[i+1] = rptr[i] + ncols;
2013     }
2014   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2015 
2016   /* send nzlocal to others, and recv other's nzlocal */
2017   /*--------------------------------------------------*/
2018   if (reuse == MAT_INITIAL_MATRIX){
2019     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2020     s_waits2 = s_waits3 + nsends;
2021     s_waits1 = s_waits2 + nsends;
2022     r_waits1 = s_waits1 + nsends;
2023     r_waits2 = r_waits1 + nrecvs;
2024     r_waits3 = r_waits2 + nrecvs;
2025   } else {
2026     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2027     r_waits3 = s_waits3 + nsends;
2028   }
2029 
2030   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2031   if (reuse == MAT_INITIAL_MATRIX){
2032     /* get new tags to keep the communication clean */
2033     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2034     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2035     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2036     rbuf_nz = sbuf_nz + nsends;
2037 
2038     /* post receives of other's nzlocal */
2039     for (i=0; i<nrecvs; i++){
2040       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2041     }
2042     /* send nzlocal to others */
2043     for (i=0; i<nsends; i++){
2044       sbuf_nz[i] = nzlocal;
2045       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2046     }
2047     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2048     count = nrecvs;
2049     while (count) {
2050       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2051       recv_rank[imdex] = recv_status.MPI_SOURCE;
2052       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2053       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2054 
2055       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2056       rbuf_nz[imdex] += i + 2;
2057       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2058       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2059       count--;
2060     }
2061     /* wait on sends of nzlocal */
2062     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2063     /* send mat->i,j to others, and recv from other's */
2064     /*------------------------------------------------*/
2065     for (i=0; i<nsends; i++){
2066       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2067       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2068     }
2069     /* wait on receives of mat->i,j */
2070     /*------------------------------*/
2071     count = nrecvs;
2072     while (count) {
2073       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2074       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2075       count--;
2076     }
2077     /* wait on sends of mat->i,j */
2078     /*---------------------------*/
2079     if (nsends) {
2080       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2081     }
2082   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2083 
2084   /* post receives, send and receive mat->a */
2085   /*----------------------------------------*/
2086   for (imdex=0; imdex<nrecvs; imdex++) {
2087     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2088   }
2089   for (i=0; i<nsends; i++){
2090     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2091   }
2092   count = nrecvs;
2093   while (count) {
2094     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2095     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2096     count--;
2097   }
2098   if (nsends) {
2099     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2100   }
2101 
2102   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2103 
2104   /* create redundant matrix */
2105   /*-------------------------*/
2106   if (reuse == MAT_INITIAL_MATRIX){
2107     /* compute rownz_max for preallocation */
2108     for (imdex=0; imdex<nrecvs; imdex++){
2109       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2110       rptr = rbuf_j[imdex];
2111       for (i=0; i<j; i++){
2112         ncols = rptr[i+1] - rptr[i];
2113         if (rownz_max < ncols) rownz_max = ncols;
2114       }
2115     }
2116 
2117     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2118     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2119     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2120     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2121     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2122   } else {
2123     C = *matredundant;
2124   }
2125 
2126   /* insert local matrix entries */
2127   rptr = sbuf_j;
2128   cols = sbuf_j + rend-rstart + 1;
2129   vals = sbuf_a;
2130   for (i=0; i<rend-rstart; i++){
2131     row   = i + rstart;
2132     ncols = rptr[i+1] - rptr[i];
2133     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2134     vals += ncols;
2135     cols += ncols;
2136   }
2137   /* insert received matrix entries */
2138   for (imdex=0; imdex<nrecvs; imdex++){
2139     rstart = rowrange[recv_rank[imdex]];
2140     rend   = rowrange[recv_rank[imdex]+1];
2141     rptr = rbuf_j[imdex];
2142     cols = rbuf_j[imdex] + rend-rstart + 1;
2143     vals = rbuf_a[imdex];
2144     for (i=0; i<rend-rstart; i++){
2145       row   = i + rstart;
2146       ncols = rptr[i+1] - rptr[i];
2147       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2148       vals += ncols;
2149       cols += ncols;
2150     }
2151   }
2152   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2153   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2154   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2155   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
2156   if (reuse == MAT_INITIAL_MATRIX){
2157     PetscContainer container;
2158     *matredundant = C;
2159     /* create a supporting struct and attach it to C for reuse */
2160     ierr = PetscNew(Mat_Redundant,&redund);CHKERRQ(ierr);
2161     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2162     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2163     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2164     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2165 
2166     redund->nzlocal = nzlocal;
2167     redund->nsends  = nsends;
2168     redund->nrecvs  = nrecvs;
2169     redund->send_rank = send_rank;
2170     redund->sbuf_nz = sbuf_nz;
2171     redund->sbuf_j  = sbuf_j;
2172     redund->sbuf_a  = sbuf_a;
2173     redund->rbuf_j  = rbuf_j;
2174     redund->rbuf_a  = rbuf_a;
2175 
2176     redund->MatDestroy = C->ops->destroy;
2177     C->ops->destroy    = MatDestroy_MatRedundant;
2178   }
2179   PetscFunctionReturn(0);
2180 }
2181 
2182 /* -------------------------------------------------------------------*/
2183 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2184        MatGetRow_MPIAIJ,
2185        MatRestoreRow_MPIAIJ,
2186        MatMult_MPIAIJ,
2187 /* 4*/ MatMultAdd_MPIAIJ,
2188        MatMultTranspose_MPIAIJ,
2189        MatMultTransposeAdd_MPIAIJ,
2190 #ifdef PETSC_HAVE_PBGL
2191        MatSolve_MPIAIJ,
2192 #else
2193        0,
2194 #endif
2195        0,
2196        0,
2197 /*10*/ 0,
2198        0,
2199        0,
2200        MatRelax_MPIAIJ,
2201        MatTranspose_MPIAIJ,
2202 /*15*/ MatGetInfo_MPIAIJ,
2203        MatEqual_MPIAIJ,
2204        MatGetDiagonal_MPIAIJ,
2205        MatDiagonalScale_MPIAIJ,
2206        MatNorm_MPIAIJ,
2207 /*20*/ MatAssemblyBegin_MPIAIJ,
2208        MatAssemblyEnd_MPIAIJ,
2209        0,
2210        MatSetOption_MPIAIJ,
2211        MatZeroEntries_MPIAIJ,
2212 /*25*/ MatZeroRows_MPIAIJ,
2213        0,
2214 #ifdef PETSC_HAVE_PBGL
2215        MatLUFactorNumeric_MPIAIJ,
2216 #else
2217        0,
2218 #endif
2219        0,
2220        0,
2221 /*30*/ MatSetUpPreallocation_MPIAIJ,
2222 #ifdef PETSC_HAVE_PBGL
2223        MatILUFactorSymbolic_MPIAIJ,
2224 #else
2225        0,
2226 #endif
2227        0,
2228        0,
2229        0,
2230 /*35*/ MatDuplicate_MPIAIJ,
2231        0,
2232        0,
2233        0,
2234        0,
2235 /*40*/ MatAXPY_MPIAIJ,
2236        MatGetSubMatrices_MPIAIJ,
2237        MatIncreaseOverlap_MPIAIJ,
2238        MatGetValues_MPIAIJ,
2239        MatCopy_MPIAIJ,
2240 /*45*/ 0,
2241        MatScale_MPIAIJ,
2242        0,
2243        0,
2244        0,
2245 /*50*/ MatSetBlockSize_MPIAIJ,
2246        0,
2247        0,
2248        0,
2249        0,
2250 /*55*/ MatFDColoringCreate_MPIAIJ,
2251        0,
2252        MatSetUnfactored_MPIAIJ,
2253        MatPermute_MPIAIJ,
2254        0,
2255 /*60*/ MatGetSubMatrix_MPIAIJ,
2256        MatDestroy_MPIAIJ,
2257        MatView_MPIAIJ,
2258        0,
2259        0,
2260 /*65*/ 0,
2261        0,
2262        0,
2263        0,
2264        0,
2265 /*70*/ 0,
2266        0,
2267        MatSetColoring_MPIAIJ,
2268 #if defined(PETSC_HAVE_ADIC)
2269        MatSetValuesAdic_MPIAIJ,
2270 #else
2271        0,
2272 #endif
2273        MatSetValuesAdifor_MPIAIJ,
2274 /*75*/ 0,
2275        0,
2276        0,
2277        0,
2278        0,
2279 /*80*/ 0,
2280        0,
2281        0,
2282        0,
2283 /*84*/ MatLoad_MPIAIJ,
2284        0,
2285        0,
2286        0,
2287        0,
2288        0,
2289 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2290        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2291        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2292        MatPtAP_Basic,
2293        MatPtAPSymbolic_MPIAIJ,
2294 /*95*/ MatPtAPNumeric_MPIAIJ,
2295        0,
2296        0,
2297        0,
2298        0,
2299 /*100*/0,
2300        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2301        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2302        MatConjugate_MPIAIJ,
2303        0,
2304 /*105*/MatSetValuesRow_MPIAIJ,
2305        MatRealPart_MPIAIJ,
2306        MatImaginaryPart_MPIAIJ,
2307        0,
2308        0,
2309 /*110*/0,
2310        MatGetRedundantMatrix_MPIAIJ};
2311 
2312 /* ----------------------------------------------------------------------------------------*/
2313 
2314 EXTERN_C_BEGIN
2315 #undef __FUNCT__
2316 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2317 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2318 {
2319   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2320   PetscErrorCode ierr;
2321 
2322   PetscFunctionBegin;
2323   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2324   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2325   PetscFunctionReturn(0);
2326 }
2327 EXTERN_C_END
2328 
2329 EXTERN_C_BEGIN
2330 #undef __FUNCT__
2331 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2332 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2333 {
2334   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2335   PetscErrorCode ierr;
2336 
2337   PetscFunctionBegin;
2338   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2339   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2340   PetscFunctionReturn(0);
2341 }
2342 EXTERN_C_END
2343 
2344 #include "petscpc.h"
2345 EXTERN_C_BEGIN
2346 #undef __FUNCT__
2347 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2348 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2349 {
2350   Mat_MPIAIJ     *b;
2351   PetscErrorCode ierr;
2352   PetscInt       i;
2353 
2354   PetscFunctionBegin;
2355   B->preallocated = PETSC_TRUE;
2356   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2357   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2358   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2359   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2360 
2361   B->rmap.bs = B->cmap.bs = 1;
2362   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2363   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2364   if (d_nnz) {
2365     for (i=0; i<B->rmap.n; i++) {
2366       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2367     }
2368   }
2369   if (o_nnz) {
2370     for (i=0; i<B->rmap.n; i++) {
2371       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2372     }
2373   }
2374   b = (Mat_MPIAIJ*)B->data;
2375 
2376   /* Explicitly create 2 MATSEQAIJ matrices. */
2377   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2378   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2379   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2380   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2381   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2382   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2383   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2384   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2385 
2386   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2387   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2388 
2389   PetscFunctionReturn(0);
2390 }
2391 EXTERN_C_END
2392 
2393 #undef __FUNCT__
2394 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2395 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2396 {
2397   Mat            mat;
2398   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2399   PetscErrorCode ierr;
2400 
2401   PetscFunctionBegin;
2402   *newmat       = 0;
2403   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2404   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2405   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2406   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2407   a    = (Mat_MPIAIJ*)mat->data;
2408 
2409   mat->factor       = matin->factor;
2410   mat->rmap.bs      = matin->rmap.bs;
2411   mat->assembled    = PETSC_TRUE;
2412   mat->insertmode   = NOT_SET_VALUES;
2413   mat->preallocated = PETSC_TRUE;
2414 
2415   a->size           = oldmat->size;
2416   a->rank           = oldmat->rank;
2417   a->donotstash     = oldmat->donotstash;
2418   a->roworiented    = oldmat->roworiented;
2419   a->rowindices     = 0;
2420   a->rowvalues      = 0;
2421   a->getrowactive   = PETSC_FALSE;
2422 
2423   ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2424   ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2425 
2426   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2427   if (oldmat->colmap) {
2428 #if defined (PETSC_USE_CTABLE)
2429     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2430 #else
2431     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2432     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2433     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2434 #endif
2435   } else a->colmap = 0;
2436   if (oldmat->garray) {
2437     PetscInt len;
2438     len  = oldmat->B->cmap.n;
2439     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2440     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2441     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2442   } else a->garray = 0;
2443 
2444   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2445   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2446   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2447   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2448   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2449   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2450   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2451   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2452   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2453   *newmat = mat;
2454   PetscFunctionReturn(0);
2455 }
2456 
2457 #include "petscsys.h"
2458 
2459 #undef __FUNCT__
2460 #define __FUNCT__ "MatLoad_MPIAIJ"
2461 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2462 {
2463   Mat            A;
2464   PetscScalar    *vals,*svals;
2465   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2466   MPI_Status     status;
2467   PetscErrorCode ierr;
2468   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2469   PetscInt       i,nz,j,rstart,rend,mmax;
2470   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2471   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2472   PetscInt       cend,cstart,n,*rowners;
2473   int            fd;
2474 
2475   PetscFunctionBegin;
2476   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2477   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2478   if (!rank) {
2479     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2480     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2481     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2482   }
2483 
2484   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2485   M = header[1]; N = header[2];
2486   /* determine ownership of all rows */
2487   m    = M/size + ((M % size) > rank);
2488   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2489   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2490 
2491   /* First process needs enough room for process with most rows */
2492   if (!rank) {
2493     mmax       = rowners[1];
2494     for (i=2; i<size; i++) {
2495       mmax = PetscMax(mmax,rowners[i]);
2496     }
2497   } else mmax = m;
2498 
2499   rowners[0] = 0;
2500   for (i=2; i<=size; i++) {
2501     rowners[i] += rowners[i-1];
2502   }
2503   rstart = rowners[rank];
2504   rend   = rowners[rank+1];
2505 
2506   /* distribute row lengths to all processors */
2507   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2508   if (!rank) {
2509     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2510     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2511     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2512     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2513     for (j=0; j<m; j++) {
2514       procsnz[0] += ourlens[j];
2515     }
2516     for (i=1; i<size; i++) {
2517       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2518       /* calculate the number of nonzeros on each processor */
2519       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2520         procsnz[i] += rowlengths[j];
2521       }
2522       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2523     }
2524     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2525   } else {
2526     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2527   }
2528 
2529   if (!rank) {
2530     /* determine max buffer needed and allocate it */
2531     maxnz = 0;
2532     for (i=0; i<size; i++) {
2533       maxnz = PetscMax(maxnz,procsnz[i]);
2534     }
2535     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2536 
2537     /* read in my part of the matrix column indices  */
2538     nz   = procsnz[0];
2539     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2540     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2541 
2542     /* read in every one elses and ship off */
2543     for (i=1; i<size; i++) {
2544       nz   = procsnz[i];
2545       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2546       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2547     }
2548     ierr = PetscFree(cols);CHKERRQ(ierr);
2549   } else {
2550     /* determine buffer space needed for message */
2551     nz = 0;
2552     for (i=0; i<m; i++) {
2553       nz += ourlens[i];
2554     }
2555     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2556 
2557     /* receive message of column indices*/
2558     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2559     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2560     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2561   }
2562 
2563   /* determine column ownership if matrix is not square */
2564   if (N != M) {
2565     n      = N/size + ((N % size) > rank);
2566     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2567     cstart = cend - n;
2568   } else {
2569     cstart = rstart;
2570     cend   = rend;
2571     n      = cend - cstart;
2572   }
2573 
2574   /* loop over local rows, determining number of off diagonal entries */
2575   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2576   jj = 0;
2577   for (i=0; i<m; i++) {
2578     for (j=0; j<ourlens[i]; j++) {
2579       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2580       jj++;
2581     }
2582   }
2583 
2584   /* create our matrix */
2585   for (i=0; i<m; i++) {
2586     ourlens[i] -= offlens[i];
2587   }
2588   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2589   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2590   ierr = MatSetType(A,type);CHKERRQ(ierr);
2591   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2592 
2593   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2594   for (i=0; i<m; i++) {
2595     ourlens[i] += offlens[i];
2596   }
2597 
2598   if (!rank) {
2599     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2600 
2601     /* read in my part of the matrix numerical values  */
2602     nz   = procsnz[0];
2603     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2604 
2605     /* insert into matrix */
2606     jj      = rstart;
2607     smycols = mycols;
2608     svals   = vals;
2609     for (i=0; i<m; i++) {
2610       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2611       smycols += ourlens[i];
2612       svals   += ourlens[i];
2613       jj++;
2614     }
2615 
2616     /* read in other processors and ship out */
2617     for (i=1; i<size; i++) {
2618       nz   = procsnz[i];
2619       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2620       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2621     }
2622     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2623   } else {
2624     /* receive numeric values */
2625     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2626 
2627     /* receive message of values*/
2628     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2629     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2630     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2631 
2632     /* insert into matrix */
2633     jj      = rstart;
2634     smycols = mycols;
2635     svals   = vals;
2636     for (i=0; i<m; i++) {
2637       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2638       smycols += ourlens[i];
2639       svals   += ourlens[i];
2640       jj++;
2641     }
2642   }
2643   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2644   ierr = PetscFree(vals);CHKERRQ(ierr);
2645   ierr = PetscFree(mycols);CHKERRQ(ierr);
2646   ierr = PetscFree(rowners);CHKERRQ(ierr);
2647 
2648   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2649   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2650   *newmat = A;
2651   PetscFunctionReturn(0);
2652 }
2653 
2654 #undef __FUNCT__
2655 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2656 /*
2657     Not great since it makes two copies of the submatrix, first an SeqAIJ
2658   in local and then by concatenating the local matrices the end result.
2659   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2660 */
2661 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2662 {
2663   PetscErrorCode ierr;
2664   PetscMPIInt    rank,size;
2665   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2666   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2667   Mat            *local,M,Mreuse;
2668   PetscScalar    *vwork,*aa;
2669   MPI_Comm       comm = mat->comm;
2670   Mat_SeqAIJ     *aij;
2671 
2672 
2673   PetscFunctionBegin;
2674   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2675   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2676 
2677   if (call ==  MAT_REUSE_MATRIX) {
2678     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2679     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2680     local = &Mreuse;
2681     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2682   } else {
2683     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2684     Mreuse = *local;
2685     ierr   = PetscFree(local);CHKERRQ(ierr);
2686   }
2687 
2688   /*
2689       m - number of local rows
2690       n - number of columns (same on all processors)
2691       rstart - first row in new global matrix generated
2692   */
2693   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2694   if (call == MAT_INITIAL_MATRIX) {
2695     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2696     ii  = aij->i;
2697     jj  = aij->j;
2698 
2699     /*
2700         Determine the number of non-zeros in the diagonal and off-diagonal
2701         portions of the matrix in order to do correct preallocation
2702     */
2703 
2704     /* first get start and end of "diagonal" columns */
2705     if (csize == PETSC_DECIDE) {
2706       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2707       if (mglobal == n) { /* square matrix */
2708 	nlocal = m;
2709       } else {
2710         nlocal = n/size + ((n % size) > rank);
2711       }
2712     } else {
2713       nlocal = csize;
2714     }
2715     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2716     rstart = rend - nlocal;
2717     if (rank == size - 1 && rend != n) {
2718       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2719     }
2720 
2721     /* next, compute all the lengths */
2722     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2723     olens = dlens + m;
2724     for (i=0; i<m; i++) {
2725       jend = ii[i+1] - ii[i];
2726       olen = 0;
2727       dlen = 0;
2728       for (j=0; j<jend; j++) {
2729         if (*jj < rstart || *jj >= rend) olen++;
2730         else dlen++;
2731         jj++;
2732       }
2733       olens[i] = olen;
2734       dlens[i] = dlen;
2735     }
2736     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2737     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2738     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2739     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2740     ierr = PetscFree(dlens);CHKERRQ(ierr);
2741   } else {
2742     PetscInt ml,nl;
2743 
2744     M = *newmat;
2745     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2746     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2747     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2748     /*
2749          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2750        rather than the slower MatSetValues().
2751     */
2752     M->was_assembled = PETSC_TRUE;
2753     M->assembled     = PETSC_FALSE;
2754   }
2755   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2756   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2757   ii  = aij->i;
2758   jj  = aij->j;
2759   aa  = aij->a;
2760   for (i=0; i<m; i++) {
2761     row   = rstart + i;
2762     nz    = ii[i+1] - ii[i];
2763     cwork = jj;     jj += nz;
2764     vwork = aa;     aa += nz;
2765     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2766   }
2767 
2768   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2769   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2770   *newmat = M;
2771 
2772   /* save submatrix used in processor for next request */
2773   if (call ==  MAT_INITIAL_MATRIX) {
2774     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2775     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2776   }
2777 
2778   PetscFunctionReturn(0);
2779 }
2780 
2781 EXTERN_C_BEGIN
2782 #undef __FUNCT__
2783 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2784 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2785 {
2786   PetscInt       m,cstart, cend,j,nnz,i,d;
2787   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2788   const PetscInt *JJ;
2789   PetscScalar    *values;
2790   PetscErrorCode ierr;
2791 
2792   PetscFunctionBegin;
2793   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2794 
2795   B->rmap.bs = B->cmap.bs = 1;
2796   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2797   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2798   m      = B->rmap.n;
2799   cstart = B->cmap.rstart;
2800   cend   = B->cmap.rend;
2801   rstart = B->rmap.rstart;
2802 
2803   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2804   o_nnz = d_nnz + m;
2805 
2806   for (i=0; i<m; i++) {
2807     nnz     = Ii[i+1]- Ii[i];
2808     JJ      = J + Ii[i];
2809     nnz_max = PetscMax(nnz_max,nnz);
2810     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2811     for (j=0; j<nnz; j++) {
2812       if (*JJ >= cstart) break;
2813       JJ++;
2814     }
2815     d = 0;
2816     for (; j<nnz; j++) {
2817       if (*JJ++ >= cend) break;
2818       d++;
2819     }
2820     d_nnz[i] = d;
2821     o_nnz[i] = nnz - d;
2822   }
2823   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2824   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2825 
2826   if (v) values = (PetscScalar*)v;
2827   else {
2828     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2829     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2830   }
2831 
2832   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2833   for (i=0; i<m; i++) {
2834     ii   = i + rstart;
2835     nnz  = Ii[i+1]- Ii[i];
2836     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2837   }
2838   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2839   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2840   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2841 
2842   if (!v) {
2843     ierr = PetscFree(values);CHKERRQ(ierr);
2844   }
2845   PetscFunctionReturn(0);
2846 }
2847 EXTERN_C_END
2848 
2849 #undef __FUNCT__
2850 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2851 /*@
2852    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2853    (the default parallel PETSc format).
2854 
2855    Collective on MPI_Comm
2856 
2857    Input Parameters:
2858 +  B - the matrix
2859 .  i - the indices into j for the start of each local row (starts with zero)
2860 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2861 -  v - optional values in the matrix
2862 
2863    Level: developer
2864 
2865    Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal
2866      storage format. Thus changing the values in a[] after this call will not effect the matrix values.
2867 
2868 .keywords: matrix, aij, compressed row, sparse, parallel
2869 
2870 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
2871           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
2872 @*/
2873 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2874 {
2875   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2876 
2877   PetscFunctionBegin;
2878   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2879   if (f) {
2880     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2881   }
2882   PetscFunctionReturn(0);
2883 }
2884 
2885 #undef __FUNCT__
2886 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2887 /*@C
2888    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2889    (the default parallel PETSc format).  For good matrix assembly performance
2890    the user should preallocate the matrix storage by setting the parameters
2891    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2892    performance can be increased by more than a factor of 50.
2893 
2894    Collective on MPI_Comm
2895 
2896    Input Parameters:
2897 +  A - the matrix
2898 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2899            (same value is used for all local rows)
2900 .  d_nnz - array containing the number of nonzeros in the various rows of the
2901            DIAGONAL portion of the local submatrix (possibly different for each row)
2902            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2903            The size of this array is equal to the number of local rows, i.e 'm'.
2904            You must leave room for the diagonal entry even if it is zero.
2905 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2906            submatrix (same value is used for all local rows).
2907 -  o_nnz - array containing the number of nonzeros in the various rows of the
2908            OFF-DIAGONAL portion of the local submatrix (possibly different for
2909            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2910            structure. The size of this array is equal to the number
2911            of local rows, i.e 'm'.
2912 
2913    If the *_nnz parameter is given then the *_nz parameter is ignored
2914 
2915    The AIJ format (also called the Yale sparse matrix format or
2916    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2917    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2918 
2919    The parallel matrix is partitioned such that the first m0 rows belong to
2920    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2921    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2922 
2923    The DIAGONAL portion of the local submatrix of a processor can be defined
2924    as the submatrix which is obtained by extraction the part corresponding
2925    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2926    first row that belongs to the processor, and r2 is the last row belonging
2927    to the this processor. This is a square mxm matrix. The remaining portion
2928    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2929 
2930    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2931 
2932    Example usage:
2933 
2934    Consider the following 8x8 matrix with 34 non-zero values, that is
2935    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2936    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2937    as follows:
2938 
2939 .vb
2940             1  2  0  |  0  3  0  |  0  4
2941     Proc0   0  5  6  |  7  0  0  |  8  0
2942             9  0 10  | 11  0  0  | 12  0
2943     -------------------------------------
2944            13  0 14  | 15 16 17  |  0  0
2945     Proc1   0 18  0  | 19 20 21  |  0  0
2946             0  0  0  | 22 23  0  | 24  0
2947     -------------------------------------
2948     Proc2  25 26 27  |  0  0 28  | 29  0
2949            30  0  0  | 31 32 33  |  0 34
2950 .ve
2951 
2952    This can be represented as a collection of submatrices as:
2953 
2954 .vb
2955       A B C
2956       D E F
2957       G H I
2958 .ve
2959 
2960    Where the submatrices A,B,C are owned by proc0, D,E,F are
2961    owned by proc1, G,H,I are owned by proc2.
2962 
2963    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2964    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2965    The 'M','N' parameters are 8,8, and have the same values on all procs.
2966 
2967    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2968    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2969    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2970    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2971    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2972    matrix, ans [DF] as another SeqAIJ matrix.
2973 
2974    When d_nz, o_nz parameters are specified, d_nz storage elements are
2975    allocated for every row of the local diagonal submatrix, and o_nz
2976    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2977    One way to choose d_nz and o_nz is to use the max nonzerors per local
2978    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2979    In this case, the values of d_nz,o_nz are:
2980 .vb
2981      proc0 : dnz = 2, o_nz = 2
2982      proc1 : dnz = 3, o_nz = 2
2983      proc2 : dnz = 1, o_nz = 4
2984 .ve
2985    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2986    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2987    for proc3. i.e we are using 12+15+10=37 storage locations to store
2988    34 values.
2989 
2990    When d_nnz, o_nnz parameters are specified, the storage is specified
2991    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2992    In the above case the values for d_nnz,o_nnz are:
2993 .vb
2994      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2995      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2996      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2997 .ve
2998    Here the space allocated is sum of all the above values i.e 34, and
2999    hence pre-allocation is perfect.
3000 
3001    Level: intermediate
3002 
3003 .keywords: matrix, aij, compressed row, sparse, parallel
3004 
3005 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3006           MPIAIJ
3007 @*/
3008 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3009 {
3010   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3011 
3012   PetscFunctionBegin;
3013   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3014   if (f) {
3015     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3016   }
3017   PetscFunctionReturn(0);
3018 }
3019 
3020 #undef __FUNCT__
3021 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3022 /*@C
3023      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3024          CSR format the local rows.
3025 
3026    Collective on MPI_Comm
3027 
3028    Input Parameters:
3029 +  comm - MPI communicator
3030 .  m - number of local rows (Cannot be PETSC_DECIDE)
3031 .  n - This value should be the same as the local size used in creating the
3032        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3033        calculated if N is given) For square matrices n is almost always m.
3034 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3035 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3036 .   i - row indices
3037 .   j - column indices
3038 -   a - matrix values
3039 
3040    Output Parameter:
3041 .   mat - the matrix
3042 
3043    Level: intermediate
3044 
3045    Notes:
3046        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3047      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3048      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3049 
3050        The i and j indices are 0 based
3051 
3052 .keywords: matrix, aij, compressed row, sparse, parallel
3053 
3054 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3055           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3056 @*/
3057 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3058 {
3059   PetscErrorCode ierr;
3060 
3061  PetscFunctionBegin;
3062   if (i[0]) {
3063     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3064   }
3065   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3066   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3067   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3068   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3069   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3070   PetscFunctionReturn(0);
3071 }
3072 
3073 #undef __FUNCT__
3074 #define __FUNCT__ "MatCreateMPIAIJ"
3075 /*@C
3076    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3077    (the default parallel PETSc format).  For good matrix assembly performance
3078    the user should preallocate the matrix storage by setting the parameters
3079    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3080    performance can be increased by more than a factor of 50.
3081 
3082    Collective on MPI_Comm
3083 
3084    Input Parameters:
3085 +  comm - MPI communicator
3086 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3087            This value should be the same as the local size used in creating the
3088            y vector for the matrix-vector product y = Ax.
3089 .  n - This value should be the same as the local size used in creating the
3090        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3091        calculated if N is given) For square matrices n is almost always m.
3092 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3093 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3094 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3095            (same value is used for all local rows)
3096 .  d_nnz - array containing the number of nonzeros in the various rows of the
3097            DIAGONAL portion of the local submatrix (possibly different for each row)
3098            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3099            The size of this array is equal to the number of local rows, i.e 'm'.
3100            You must leave room for the diagonal entry even if it is zero.
3101 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3102            submatrix (same value is used for all local rows).
3103 -  o_nnz - array containing the number of nonzeros in the various rows of the
3104            OFF-DIAGONAL portion of the local submatrix (possibly different for
3105            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3106            structure. The size of this array is equal to the number
3107            of local rows, i.e 'm'.
3108 
3109    Output Parameter:
3110 .  A - the matrix
3111 
3112    Notes:
3113    If the *_nnz parameter is given then the *_nz parameter is ignored
3114 
3115    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3116    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3117    storage requirements for this matrix.
3118 
3119    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3120    processor than it must be used on all processors that share the object for
3121    that argument.
3122 
3123    The user MUST specify either the local or global matrix dimensions
3124    (possibly both).
3125 
3126    The parallel matrix is partitioned across processors such that the
3127    first m0 rows belong to process 0, the next m1 rows belong to
3128    process 1, the next m2 rows belong to process 2 etc.. where
3129    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3130    values corresponding to [m x N] submatrix.
3131 
3132    The columns are logically partitioned with the n0 columns belonging
3133    to 0th partition, the next n1 columns belonging to the next
3134    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3135 
3136    The DIAGONAL portion of the local submatrix on any given processor
3137    is the submatrix corresponding to the rows and columns m,n
3138    corresponding to the given processor. i.e diagonal matrix on
3139    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3140    etc. The remaining portion of the local submatrix [m x (N-n)]
3141    constitute the OFF-DIAGONAL portion. The example below better
3142    illustrates this concept.
3143 
3144    For a square global matrix we define each processor's diagonal portion
3145    to be its local rows and the corresponding columns (a square submatrix);
3146    each processor's off-diagonal portion encompasses the remainder of the
3147    local matrix (a rectangular submatrix).
3148 
3149    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3150 
3151    When calling this routine with a single process communicator, a matrix of
3152    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3153    type of communicator, use the construction mechanism:
3154      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3155 
3156    By default, this format uses inodes (identical nodes) when possible.
3157    We search for consecutive rows with the same nonzero structure, thereby
3158    reusing matrix information to achieve increased efficiency.
3159 
3160    Options Database Keys:
3161 +  -mat_no_inode  - Do not use inodes
3162 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3163 -  -mat_aij_oneindex - Internally use indexing starting at 1
3164         rather than 0.  Note that when calling MatSetValues(),
3165         the user still MUST index entries starting at 0!
3166 
3167 
3168    Example usage:
3169 
3170    Consider the following 8x8 matrix with 34 non-zero values, that is
3171    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3172    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3173    as follows:
3174 
3175 .vb
3176             1  2  0  |  0  3  0  |  0  4
3177     Proc0   0  5  6  |  7  0  0  |  8  0
3178             9  0 10  | 11  0  0  | 12  0
3179     -------------------------------------
3180            13  0 14  | 15 16 17  |  0  0
3181     Proc1   0 18  0  | 19 20 21  |  0  0
3182             0  0  0  | 22 23  0  | 24  0
3183     -------------------------------------
3184     Proc2  25 26 27  |  0  0 28  | 29  0
3185            30  0  0  | 31 32 33  |  0 34
3186 .ve
3187 
3188    This can be represented as a collection of submatrices as:
3189 
3190 .vb
3191       A B C
3192       D E F
3193       G H I
3194 .ve
3195 
3196    Where the submatrices A,B,C are owned by proc0, D,E,F are
3197    owned by proc1, G,H,I are owned by proc2.
3198 
3199    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3200    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3201    The 'M','N' parameters are 8,8, and have the same values on all procs.
3202 
3203    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3204    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3205    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3206    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3207    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3208    matrix, ans [DF] as another SeqAIJ matrix.
3209 
3210    When d_nz, o_nz parameters are specified, d_nz storage elements are
3211    allocated for every row of the local diagonal submatrix, and o_nz
3212    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3213    One way to choose d_nz and o_nz is to use the max nonzerors per local
3214    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3215    In this case, the values of d_nz,o_nz are:
3216 .vb
3217      proc0 : dnz = 2, o_nz = 2
3218      proc1 : dnz = 3, o_nz = 2
3219      proc2 : dnz = 1, o_nz = 4
3220 .ve
3221    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3222    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3223    for proc3. i.e we are using 12+15+10=37 storage locations to store
3224    34 values.
3225 
3226    When d_nnz, o_nnz parameters are specified, the storage is specified
3227    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3228    In the above case the values for d_nnz,o_nnz are:
3229 .vb
3230      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3231      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3232      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3233 .ve
3234    Here the space allocated is sum of all the above values i.e 34, and
3235    hence pre-allocation is perfect.
3236 
3237    Level: intermediate
3238 
3239 .keywords: matrix, aij, compressed row, sparse, parallel
3240 
3241 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3242           MPIAIJ, MatCreateMPIAIJWithArrays()
3243 @*/
3244 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3245 {
3246   PetscErrorCode ierr;
3247   PetscMPIInt    size;
3248 
3249   PetscFunctionBegin;
3250   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3251   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3252   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3253   if (size > 1) {
3254     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3255     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3256   } else {
3257     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3258     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3259   }
3260   PetscFunctionReturn(0);
3261 }
3262 
3263 #undef __FUNCT__
3264 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3265 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3266 {
3267   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3268 
3269   PetscFunctionBegin;
3270   *Ad     = a->A;
3271   *Ao     = a->B;
3272   *colmap = a->garray;
3273   PetscFunctionReturn(0);
3274 }
3275 
3276 #undef __FUNCT__
3277 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3278 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3279 {
3280   PetscErrorCode ierr;
3281   PetscInt       i;
3282   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3283 
3284   PetscFunctionBegin;
3285   if (coloring->ctype == IS_COLORING_GLOBAL) {
3286     ISColoringValue *allcolors,*colors;
3287     ISColoring      ocoloring;
3288 
3289     /* set coloring for diagonal portion */
3290     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3291 
3292     /* set coloring for off-diagonal portion */
3293     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3294     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3295     for (i=0; i<a->B->cmap.n; i++) {
3296       colors[i] = allcolors[a->garray[i]];
3297     }
3298     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3299     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3300     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3301     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3302   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3303     ISColoringValue *colors;
3304     PetscInt        *larray;
3305     ISColoring      ocoloring;
3306 
3307     /* set coloring for diagonal portion */
3308     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3309     for (i=0; i<a->A->cmap.n; i++) {
3310       larray[i] = i + A->cmap.rstart;
3311     }
3312     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3313     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3314     for (i=0; i<a->A->cmap.n; i++) {
3315       colors[i] = coloring->colors[larray[i]];
3316     }
3317     ierr = PetscFree(larray);CHKERRQ(ierr);
3318     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3319     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3320     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3321 
3322     /* set coloring for off-diagonal portion */
3323     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3324     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3325     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3326     for (i=0; i<a->B->cmap.n; i++) {
3327       colors[i] = coloring->colors[larray[i]];
3328     }
3329     ierr = PetscFree(larray);CHKERRQ(ierr);
3330     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3331     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3332     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3333   } else {
3334     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3335   }
3336 
3337   PetscFunctionReturn(0);
3338 }
3339 
3340 #if defined(PETSC_HAVE_ADIC)
3341 #undef __FUNCT__
3342 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3343 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3344 {
3345   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3346   PetscErrorCode ierr;
3347 
3348   PetscFunctionBegin;
3349   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3350   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3351   PetscFunctionReturn(0);
3352 }
3353 #endif
3354 
3355 #undef __FUNCT__
3356 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3357 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3358 {
3359   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3360   PetscErrorCode ierr;
3361 
3362   PetscFunctionBegin;
3363   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3364   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3365   PetscFunctionReturn(0);
3366 }
3367 
3368 #undef __FUNCT__
3369 #define __FUNCT__ "MatMerge"
3370 /*@C
3371       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3372                  matrices from each processor
3373 
3374     Collective on MPI_Comm
3375 
3376    Input Parameters:
3377 +    comm - the communicators the parallel matrix will live on
3378 .    inmat - the input sequential matrices
3379 .    n - number of local columns (or PETSC_DECIDE)
3380 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3381 
3382    Output Parameter:
3383 .    outmat - the parallel matrix generated
3384 
3385     Level: advanced
3386 
3387    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3388 
3389 @*/
3390 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3391 {
3392   PetscErrorCode ierr;
3393   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3394   PetscInt       *indx;
3395   PetscScalar    *values;
3396 
3397   PetscFunctionBegin;
3398   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3399   if (scall == MAT_INITIAL_MATRIX){
3400     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3401     if (n == PETSC_DECIDE){
3402       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3403     }
3404     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3405     rstart -= m;
3406 
3407     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3408     for (i=0;i<m;i++) {
3409       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3410       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3411       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3412     }
3413     /* This routine will ONLY return MPIAIJ type matrix */
3414     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3415     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3416     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3417     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3418     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3419 
3420   } else if (scall == MAT_REUSE_MATRIX){
3421     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3422   } else {
3423     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3424   }
3425 
3426   for (i=0;i<m;i++) {
3427     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3428     Ii    = i + rstart;
3429     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3430     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3431   }
3432   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3433   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3434   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3435 
3436   PetscFunctionReturn(0);
3437 }
3438 
3439 #undef __FUNCT__
3440 #define __FUNCT__ "MatFileSplit"
3441 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3442 {
3443   PetscErrorCode    ierr;
3444   PetscMPIInt       rank;
3445   PetscInt          m,N,i,rstart,nnz;
3446   size_t            len;
3447   const PetscInt    *indx;
3448   PetscViewer       out;
3449   char              *name;
3450   Mat               B;
3451   const PetscScalar *values;
3452 
3453   PetscFunctionBegin;
3454   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3455   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3456   /* Should this be the type of the diagonal block of A? */
3457   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3458   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3459   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3460   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3461   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3462   for (i=0;i<m;i++) {
3463     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3464     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3465     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3466   }
3467   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3468   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3469 
3470   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
3471   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3472   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3473   sprintf(name,"%s.%d",outfile,rank);
3474   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3475   ierr = PetscFree(name);
3476   ierr = MatView(B,out);CHKERRQ(ierr);
3477   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3478   ierr = MatDestroy(B);CHKERRQ(ierr);
3479   PetscFunctionReturn(0);
3480 }
3481 
3482 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3483 #undef __FUNCT__
3484 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3485 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3486 {
3487   PetscErrorCode       ierr;
3488   Mat_Merge_SeqsToMPI  *merge;
3489   PetscContainer       container;
3490 
3491   PetscFunctionBegin;
3492   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3493   if (container) {
3494     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3495     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3496     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3497     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3498     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3499     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3500     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3501     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3502     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3503     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3504     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3505     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3506 
3507     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3508     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3509   }
3510   ierr = PetscFree(merge);CHKERRQ(ierr);
3511 
3512   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3513   PetscFunctionReturn(0);
3514 }
3515 
3516 #include "src/mat/utils/freespace.h"
3517 #include "petscbt.h"
3518 static PetscEvent logkey_seqstompinum = 0;
3519 #undef __FUNCT__
3520 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3521 /*@C
3522       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3523                  matrices from each processor
3524 
3525     Collective on MPI_Comm
3526 
3527    Input Parameters:
3528 +    comm - the communicators the parallel matrix will live on
3529 .    seqmat - the input sequential matrices
3530 .    m - number of local rows (or PETSC_DECIDE)
3531 .    n - number of local columns (or PETSC_DECIDE)
3532 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3533 
3534    Output Parameter:
3535 .    mpimat - the parallel matrix generated
3536 
3537     Level: advanced
3538 
3539    Notes:
3540      The dimensions of the sequential matrix in each processor MUST be the same.
3541      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3542      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3543 @*/
3544 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3545 {
3546   PetscErrorCode       ierr;
3547   MPI_Comm             comm=mpimat->comm;
3548   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3549   PetscMPIInt          size,rank,taga,*len_s;
3550   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3551   PetscInt             proc,m;
3552   PetscInt             **buf_ri,**buf_rj;
3553   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3554   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3555   MPI_Request          *s_waits,*r_waits;
3556   MPI_Status           *status;
3557   MatScalar            *aa=a->a,**abuf_r,*ba_i;
3558   Mat_Merge_SeqsToMPI  *merge;
3559   PetscContainer       container;
3560 
3561   PetscFunctionBegin;
3562   if (!logkey_seqstompinum) {
3563     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
3564   }
3565   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3566 
3567   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3568   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3569 
3570   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3571   if (container) {
3572     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3573   }
3574   bi     = merge->bi;
3575   bj     = merge->bj;
3576   buf_ri = merge->buf_ri;
3577   buf_rj = merge->buf_rj;
3578 
3579   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3580   owners = merge->rowmap.range;
3581   len_s  = merge->len_s;
3582 
3583   /* send and recv matrix values */
3584   /*-----------------------------*/
3585   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3586   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3587 
3588   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3589   for (proc=0,k=0; proc<size; proc++){
3590     if (!len_s[proc]) continue;
3591     i = owners[proc];
3592     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3593     k++;
3594   }
3595 
3596   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3597   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3598   ierr = PetscFree(status);CHKERRQ(ierr);
3599 
3600   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3601   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3602 
3603   /* insert mat values of mpimat */
3604   /*----------------------------*/
3605   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3606   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3607   nextrow = buf_ri_k + merge->nrecv;
3608   nextai  = nextrow + merge->nrecv;
3609 
3610   for (k=0; k<merge->nrecv; k++){
3611     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3612     nrows = *(buf_ri_k[k]);
3613     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3614     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3615   }
3616 
3617   /* set values of ba */
3618   m = merge->rowmap.n;
3619   for (i=0; i<m; i++) {
3620     arow = owners[rank] + i;
3621     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3622     bnzi = bi[i+1] - bi[i];
3623     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3624 
3625     /* add local non-zero vals of this proc's seqmat into ba */
3626     anzi = ai[arow+1] - ai[arow];
3627     aj   = a->j + ai[arow];
3628     aa   = a->a + ai[arow];
3629     nextaj = 0;
3630     for (j=0; nextaj<anzi; j++){
3631       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3632         ba_i[j] += aa[nextaj++];
3633       }
3634     }
3635 
3636     /* add received vals into ba */
3637     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3638       /* i-th row */
3639       if (i == *nextrow[k]) {
3640         anzi = *(nextai[k]+1) - *nextai[k];
3641         aj   = buf_rj[k] + *(nextai[k]);
3642         aa   = abuf_r[k] + *(nextai[k]);
3643         nextaj = 0;
3644         for (j=0; nextaj<anzi; j++){
3645           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3646             ba_i[j] += aa[nextaj++];
3647           }
3648         }
3649         nextrow[k]++; nextai[k]++;
3650       }
3651     }
3652     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3653   }
3654   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3655   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3656 
3657   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3658   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3659   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3660   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3661   PetscFunctionReturn(0);
3662 }
3663 
3664 static PetscEvent logkey_seqstompisym = 0;
3665 #undef __FUNCT__
3666 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3667 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3668 {
3669   PetscErrorCode       ierr;
3670   Mat                  B_mpi;
3671   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3672   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3673   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3674   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3675   PetscInt             len,proc,*dnz,*onz;
3676   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3677   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3678   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3679   MPI_Status           *status;
3680   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3681   PetscBT              lnkbt;
3682   Mat_Merge_SeqsToMPI  *merge;
3683   PetscContainer       container;
3684 
3685   PetscFunctionBegin;
3686   if (!logkey_seqstompisym) {
3687     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3688   }
3689   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3690 
3691   /* make sure it is a PETSc comm */
3692   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3693   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3694   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3695 
3696   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3697   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3698 
3699   /* determine row ownership */
3700   /*---------------------------------------------------------*/
3701   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3702   merge->rowmap.n = m;
3703   merge->rowmap.N = M;
3704   merge->rowmap.bs = 1;
3705   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
3706   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3707   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3708 
3709   m      = merge->rowmap.n;
3710   M      = merge->rowmap.N;
3711   owners = merge->rowmap.range;
3712 
3713   /* determine the number of messages to send, their lengths */
3714   /*---------------------------------------------------------*/
3715   len_s  = merge->len_s;
3716 
3717   len = 0;  /* length of buf_si[] */
3718   merge->nsend = 0;
3719   for (proc=0; proc<size; proc++){
3720     len_si[proc] = 0;
3721     if (proc == rank){
3722       len_s[proc] = 0;
3723     } else {
3724       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3725       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3726     }
3727     if (len_s[proc]) {
3728       merge->nsend++;
3729       nrows = 0;
3730       for (i=owners[proc]; i<owners[proc+1]; i++){
3731         if (ai[i+1] > ai[i]) nrows++;
3732       }
3733       len_si[proc] = 2*(nrows+1);
3734       len += len_si[proc];
3735     }
3736   }
3737 
3738   /* determine the number and length of messages to receive for ij-structure */
3739   /*-------------------------------------------------------------------------*/
3740   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3741   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3742 
3743   /* post the Irecv of j-structure */
3744   /*-------------------------------*/
3745   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
3746   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3747 
3748   /* post the Isend of j-structure */
3749   /*--------------------------------*/
3750   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3751   sj_waits = si_waits + merge->nsend;
3752 
3753   for (proc=0, k=0; proc<size; proc++){
3754     if (!len_s[proc]) continue;
3755     i = owners[proc];
3756     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3757     k++;
3758   }
3759 
3760   /* receives and sends of j-structure are complete */
3761   /*------------------------------------------------*/
3762   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3763   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3764 
3765   /* send and recv i-structure */
3766   /*---------------------------*/
3767   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
3768   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3769 
3770   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3771   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3772   for (proc=0,k=0; proc<size; proc++){
3773     if (!len_s[proc]) continue;
3774     /* form outgoing message for i-structure:
3775          buf_si[0]:                 nrows to be sent
3776                [1:nrows]:           row index (global)
3777                [nrows+1:2*nrows+1]: i-structure index
3778     */
3779     /*-------------------------------------------*/
3780     nrows = len_si[proc]/2 - 1;
3781     buf_si_i    = buf_si + nrows+1;
3782     buf_si[0]   = nrows;
3783     buf_si_i[0] = 0;
3784     nrows = 0;
3785     for (i=owners[proc]; i<owners[proc+1]; i++){
3786       anzi = ai[i+1] - ai[i];
3787       if (anzi) {
3788         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3789         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3790         nrows++;
3791       }
3792     }
3793     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3794     k++;
3795     buf_si += len_si[proc];
3796   }
3797 
3798   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3799   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3800 
3801   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3802   for (i=0; i<merge->nrecv; i++){
3803     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3804   }
3805 
3806   ierr = PetscFree(len_si);CHKERRQ(ierr);
3807   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3808   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3809   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3810   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3811   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3812   ierr = PetscFree(status);CHKERRQ(ierr);
3813 
3814   /* compute a local seq matrix in each processor */
3815   /*----------------------------------------------*/
3816   /* allocate bi array and free space for accumulating nonzero column info */
3817   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3818   bi[0] = 0;
3819 
3820   /* create and initialize a linked list */
3821   nlnk = N+1;
3822   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3823 
3824   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3825   len = 0;
3826   len  = ai[owners[rank+1]] - ai[owners[rank]];
3827   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3828   current_space = free_space;
3829 
3830   /* determine symbolic info for each local row */
3831   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3832   nextrow = buf_ri_k + merge->nrecv;
3833   nextai  = nextrow + merge->nrecv;
3834   for (k=0; k<merge->nrecv; k++){
3835     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3836     nrows = *buf_ri_k[k];
3837     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3838     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3839   }
3840 
3841   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3842   len = 0;
3843   for (i=0;i<m;i++) {
3844     bnzi   = 0;
3845     /* add local non-zero cols of this proc's seqmat into lnk */
3846     arow   = owners[rank] + i;
3847     anzi   = ai[arow+1] - ai[arow];
3848     aj     = a->j + ai[arow];
3849     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3850     bnzi += nlnk;
3851     /* add received col data into lnk */
3852     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3853       if (i == *nextrow[k]) { /* i-th row */
3854         anzi = *(nextai[k]+1) - *nextai[k];
3855         aj   = buf_rj[k] + *nextai[k];
3856         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3857         bnzi += nlnk;
3858         nextrow[k]++; nextai[k]++;
3859       }
3860     }
3861     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3862 
3863     /* if free space is not available, make more free space */
3864     if (current_space->local_remaining<bnzi) {
3865       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3866       nspacedouble++;
3867     }
3868     /* copy data into free space, then initialize lnk */
3869     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3870     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3871 
3872     current_space->array           += bnzi;
3873     current_space->local_used      += bnzi;
3874     current_space->local_remaining -= bnzi;
3875 
3876     bi[i+1] = bi[i] + bnzi;
3877   }
3878 
3879   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3880 
3881   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3882   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3883   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3884 
3885   /* create symbolic parallel matrix B_mpi */
3886   /*---------------------------------------*/
3887   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3888   if (n==PETSC_DECIDE) {
3889     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3890   } else {
3891     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3892   }
3893   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3894   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3895   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3896 
3897   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3898   B_mpi->assembled     = PETSC_FALSE;
3899   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3900   merge->bi            = bi;
3901   merge->bj            = bj;
3902   merge->buf_ri        = buf_ri;
3903   merge->buf_rj        = buf_rj;
3904   merge->coi           = PETSC_NULL;
3905   merge->coj           = PETSC_NULL;
3906   merge->owners_co     = PETSC_NULL;
3907 
3908   /* attach the supporting struct to B_mpi for reuse */
3909   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3910   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
3911   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3912   *mpimat = B_mpi;
3913 
3914   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3915   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3916   PetscFunctionReturn(0);
3917 }
3918 
3919 static PetscEvent logkey_seqstompi = 0;
3920 #undef __FUNCT__
3921 #define __FUNCT__ "MatMerge_SeqsToMPI"
3922 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3923 {
3924   PetscErrorCode   ierr;
3925 
3926   PetscFunctionBegin;
3927   if (!logkey_seqstompi) {
3928     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3929   }
3930   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3931   if (scall == MAT_INITIAL_MATRIX){
3932     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3933   }
3934   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3935   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3936   PetscFunctionReturn(0);
3937 }
3938 static PetscEvent logkey_getlocalmat = 0;
3939 #undef __FUNCT__
3940 #define __FUNCT__ "MatGetLocalMat"
3941 /*@C
3942      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3943 
3944     Not Collective
3945 
3946    Input Parameters:
3947 +    A - the matrix
3948 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3949 
3950    Output Parameter:
3951 .    A_loc - the local sequential matrix generated
3952 
3953     Level: developer
3954 
3955 @*/
3956 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3957 {
3958   PetscErrorCode  ierr;
3959   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3960   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3961   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3962   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3963   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
3964   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3965 
3966   PetscFunctionBegin;
3967   if (!logkey_getlocalmat) {
3968     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3969   }
3970   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3971   if (scall == MAT_INITIAL_MATRIX){
3972     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3973     ci[0] = 0;
3974     for (i=0; i<am; i++){
3975       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3976     }
3977     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3978     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3979     k = 0;
3980     for (i=0; i<am; i++) {
3981       ncols_o = bi[i+1] - bi[i];
3982       ncols_d = ai[i+1] - ai[i];
3983       /* off-diagonal portion of A */
3984       for (jo=0; jo<ncols_o; jo++) {
3985         col = cmap[*bj];
3986         if (col >= cstart) break;
3987         cj[k]   = col; bj++;
3988         ca[k++] = *ba++;
3989       }
3990       /* diagonal portion of A */
3991       for (j=0; j<ncols_d; j++) {
3992         cj[k]   = cstart + *aj++;
3993         ca[k++] = *aa++;
3994       }
3995       /* off-diagonal portion of A */
3996       for (j=jo; j<ncols_o; j++) {
3997         cj[k]   = cmap[*bj++];
3998         ca[k++] = *ba++;
3999       }
4000     }
4001     /* put together the new matrix */
4002     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4003     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4004     /* Since these are PETSc arrays, change flags to free them as necessary. */
4005     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4006     mat->free_a  = PETSC_TRUE;
4007     mat->free_ij = PETSC_TRUE;
4008     mat->nonew   = 0;
4009   } else if (scall == MAT_REUSE_MATRIX){
4010     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4011     ci = mat->i; cj = mat->j; ca = mat->a;
4012     for (i=0; i<am; i++) {
4013       /* off-diagonal portion of A */
4014       ncols_o = bi[i+1] - bi[i];
4015       for (jo=0; jo<ncols_o; jo++) {
4016         col = cmap[*bj];
4017         if (col >= cstart) break;
4018         *ca++ = *ba++; bj++;
4019       }
4020       /* diagonal portion of A */
4021       ncols_d = ai[i+1] - ai[i];
4022       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
4023       /* off-diagonal portion of A */
4024       for (j=jo; j<ncols_o; j++) {
4025         *ca++ = *ba++; bj++;
4026       }
4027     }
4028   } else {
4029     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4030   }
4031 
4032   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
4033   PetscFunctionReturn(0);
4034 }
4035 
4036 static PetscEvent logkey_getlocalmatcondensed = 0;
4037 #undef __FUNCT__
4038 #define __FUNCT__ "MatGetLocalMatCondensed"
4039 /*@C
4040      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4041 
4042     Not Collective
4043 
4044    Input Parameters:
4045 +    A - the matrix
4046 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4047 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4048 
4049    Output Parameter:
4050 .    A_loc - the local sequential matrix generated
4051 
4052     Level: developer
4053 
4054 @*/
4055 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4056 {
4057   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4058   PetscErrorCode    ierr;
4059   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4060   IS                isrowa,iscola;
4061   Mat               *aloc;
4062 
4063   PetscFunctionBegin;
4064   if (!logkey_getlocalmatcondensed) {
4065     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
4066   }
4067   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4068   if (!row){
4069     start = A->rmap.rstart; end = A->rmap.rend;
4070     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4071   } else {
4072     isrowa = *row;
4073   }
4074   if (!col){
4075     start = A->cmap.rstart;
4076     cmap  = a->garray;
4077     nzA   = a->A->cmap.n;
4078     nzB   = a->B->cmap.n;
4079     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4080     ncols = 0;
4081     for (i=0; i<nzB; i++) {
4082       if (cmap[i] < start) idx[ncols++] = cmap[i];
4083       else break;
4084     }
4085     imark = i;
4086     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4087     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4088     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4089     ierr = PetscFree(idx);CHKERRQ(ierr);
4090   } else {
4091     iscola = *col;
4092   }
4093   if (scall != MAT_INITIAL_MATRIX){
4094     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4095     aloc[0] = *A_loc;
4096   }
4097   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4098   *A_loc = aloc[0];
4099   ierr = PetscFree(aloc);CHKERRQ(ierr);
4100   if (!row){
4101     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4102   }
4103   if (!col){
4104     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4105   }
4106   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4107   PetscFunctionReturn(0);
4108 }
4109 
4110 static PetscEvent logkey_GetBrowsOfAcols = 0;
4111 #undef __FUNCT__
4112 #define __FUNCT__ "MatGetBrowsOfAcols"
4113 /*@C
4114     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4115 
4116     Collective on Mat
4117 
4118    Input Parameters:
4119 +    A,B - the matrices in mpiaij format
4120 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4121 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4122 
4123    Output Parameter:
4124 +    rowb, colb - index sets of rows and columns of B to extract
4125 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
4126 -    B_seq - the sequential matrix generated
4127 
4128     Level: developer
4129 
4130 @*/
4131 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4132 {
4133   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4134   PetscErrorCode    ierr;
4135   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4136   IS                isrowb,iscolb;
4137   Mat               *bseq;
4138 
4139   PetscFunctionBegin;
4140   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4141     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4142   }
4143   if (!logkey_GetBrowsOfAcols) {
4144     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
4145   }
4146   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4147 
4148   if (scall == MAT_INITIAL_MATRIX){
4149     start = A->cmap.rstart;
4150     cmap  = a->garray;
4151     nzA   = a->A->cmap.n;
4152     nzB   = a->B->cmap.n;
4153     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4154     ncols = 0;
4155     for (i=0; i<nzB; i++) {  /* row < local row index */
4156       if (cmap[i] < start) idx[ncols++] = cmap[i];
4157       else break;
4158     }
4159     imark = i;
4160     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4161     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4162     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4163     ierr = PetscFree(idx);CHKERRQ(ierr);
4164     *brstart = imark;
4165     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
4166   } else {
4167     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4168     isrowb = *rowb; iscolb = *colb;
4169     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4170     bseq[0] = *B_seq;
4171   }
4172   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4173   *B_seq = bseq[0];
4174   ierr = PetscFree(bseq);CHKERRQ(ierr);
4175   if (!rowb){
4176     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4177   } else {
4178     *rowb = isrowb;
4179   }
4180   if (!colb){
4181     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4182   } else {
4183     *colb = iscolb;
4184   }
4185   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4186   PetscFunctionReturn(0);
4187 }
4188 
4189 static PetscEvent logkey_GetBrowsOfAocols = 0;
4190 #undef __FUNCT__
4191 #define __FUNCT__ "MatGetBrowsOfAoCols"
4192 /*@C
4193     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4194     of the OFF-DIAGONAL portion of local A
4195 
4196     Collective on Mat
4197 
4198    Input Parameters:
4199 +    A,B - the matrices in mpiaij format
4200 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4201 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4202 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4203 
4204    Output Parameter:
4205 +    B_oth - the sequential matrix generated
4206 
4207     Level: developer
4208 
4209 @*/
4210 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
4211 {
4212   VecScatter_MPI_General *gen_to,*gen_from;
4213   PetscErrorCode         ierr;
4214   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4215   Mat_SeqAIJ             *b_oth;
4216   VecScatter             ctx=a->Mvctx;
4217   MPI_Comm               comm=ctx->comm;
4218   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
4219   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
4220   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
4221   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4222   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4223   MPI_Status             *sstatus,rstatus;
4224   PetscMPIInt            jj;
4225   PetscInt               *cols,sbs,rbs;
4226   PetscScalar            *vals;
4227 
4228   PetscFunctionBegin;
4229   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4230     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4231   }
4232   if (!logkey_GetBrowsOfAocols) {
4233     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
4234   }
4235   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4236   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4237 
4238   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4239   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4240   rvalues  = gen_from->values; /* holds the length of receiving row */
4241   svalues  = gen_to->values;   /* holds the length of sending row */
4242   nrecvs   = gen_from->n;
4243   nsends   = gen_to->n;
4244 
4245   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4246   srow     = gen_to->indices;   /* local row index to be sent */
4247   sstarts  = gen_to->starts;
4248   sprocs   = gen_to->procs;
4249   sstatus  = gen_to->sstatus;
4250   sbs      = gen_to->bs;
4251   rstarts  = gen_from->starts;
4252   rprocs   = gen_from->procs;
4253   rbs      = gen_from->bs;
4254 
4255   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4256   if (scall == MAT_INITIAL_MATRIX){
4257     /* i-array */
4258     /*---------*/
4259     /*  post receives */
4260     for (i=0; i<nrecvs; i++){
4261       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4262       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4263       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4264     }
4265 
4266     /* pack the outgoing message */
4267     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4268     rstartsj = sstartsj + nsends +1;
4269     sstartsj[0] = 0;  rstartsj[0] = 0;
4270     len = 0; /* total length of j or a array to be sent */
4271     k = 0;
4272     for (i=0; i<nsends; i++){
4273       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4274       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4275       for (j=0; j<nrows; j++) {
4276         row = srow[k] + B->rmap.range[rank]; /* global row idx */
4277         for (l=0; l<sbs; l++){
4278           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4279           rowlen[j*sbs+l] = ncols;
4280           len += ncols;
4281           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4282         }
4283         k++;
4284       }
4285       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4286       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4287     }
4288     /* recvs and sends of i-array are completed */
4289     i = nrecvs;
4290     while (i--) {
4291       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4292     }
4293     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4294 
4295     /* allocate buffers for sending j and a arrays */
4296     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4297     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4298 
4299     /* create i-array of B_oth */
4300     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4301     b_othi[0] = 0;
4302     len = 0; /* total length of j or a array to be received */
4303     k = 0;
4304     for (i=0; i<nrecvs; i++){
4305       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4306       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4307       for (j=0; j<nrows; j++) {
4308         b_othi[k+1] = b_othi[k] + rowlen[j];
4309         len += rowlen[j]; k++;
4310       }
4311       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4312     }
4313 
4314     /* allocate space for j and a arrrays of B_oth */
4315     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4316     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
4317 
4318     /* j-array */
4319     /*---------*/
4320     /*  post receives of j-array */
4321     for (i=0; i<nrecvs; i++){
4322       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4323       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4324     }
4325 
4326     /* pack the outgoing message j-array */
4327     k = 0;
4328     for (i=0; i<nsends; i++){
4329       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4330       bufJ = bufj+sstartsj[i];
4331       for (j=0; j<nrows; j++) {
4332         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4333         for (ll=0; ll<sbs; ll++){
4334           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4335           for (l=0; l<ncols; l++){
4336             *bufJ++ = cols[l];
4337           }
4338           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4339         }
4340       }
4341       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4342     }
4343 
4344     /* recvs and sends of j-array are completed */
4345     i = nrecvs;
4346     while (i--) {
4347       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4348     }
4349     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4350   } else if (scall == MAT_REUSE_MATRIX){
4351     sstartsj = *startsj;
4352     rstartsj = sstartsj + nsends +1;
4353     bufa     = *bufa_ptr;
4354     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4355     b_otha   = b_oth->a;
4356   } else {
4357     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4358   }
4359 
4360   /* a-array */
4361   /*---------*/
4362   /*  post receives of a-array */
4363   for (i=0; i<nrecvs; i++){
4364     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4365     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4366   }
4367 
4368   /* pack the outgoing message a-array */
4369   k = 0;
4370   for (i=0; i<nsends; i++){
4371     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4372     bufA = bufa+sstartsj[i];
4373     for (j=0; j<nrows; j++) {
4374       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4375       for (ll=0; ll<sbs; ll++){
4376         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4377         for (l=0; l<ncols; l++){
4378           *bufA++ = vals[l];
4379         }
4380         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4381       }
4382     }
4383     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4384   }
4385   /* recvs and sends of a-array are completed */
4386   i = nrecvs;
4387   while (i--) {
4388     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4389   }
4390   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4391   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4392 
4393   if (scall == MAT_INITIAL_MATRIX){
4394     /* put together the new matrix */
4395     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4396 
4397     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4398     /* Since these are PETSc arrays, change flags to free them as necessary. */
4399     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4400     b_oth->free_a  = PETSC_TRUE;
4401     b_oth->free_ij = PETSC_TRUE;
4402     b_oth->nonew   = 0;
4403 
4404     ierr = PetscFree(bufj);CHKERRQ(ierr);
4405     if (!startsj || !bufa_ptr){
4406       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4407       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4408     } else {
4409       *startsj  = sstartsj;
4410       *bufa_ptr = bufa;
4411     }
4412   }
4413   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4414   PetscFunctionReturn(0);
4415 }
4416 
4417 #undef __FUNCT__
4418 #define __FUNCT__ "MatGetCommunicationStructs"
4419 /*@C
4420   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4421 
4422   Not Collective
4423 
4424   Input Parameters:
4425 . A - The matrix in mpiaij format
4426 
4427   Output Parameter:
4428 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4429 . colmap - A map from global column index to local index into lvec
4430 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4431 
4432   Level: developer
4433 
4434 @*/
4435 #if defined (PETSC_USE_CTABLE)
4436 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4437 #else
4438 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4439 #endif
4440 {
4441   Mat_MPIAIJ *a;
4442 
4443   PetscFunctionBegin;
4444   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4445   PetscValidPointer(lvec, 2)
4446   PetscValidPointer(colmap, 3)
4447   PetscValidPointer(multScatter, 4)
4448   a = (Mat_MPIAIJ *) A->data;
4449   if (lvec) *lvec = a->lvec;
4450   if (colmap) *colmap = a->colmap;
4451   if (multScatter) *multScatter = a->Mvctx;
4452   PetscFunctionReturn(0);
4453 }
4454 
4455 EXTERN_C_BEGIN
4456 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*);
4457 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*);
4458 EXTERN_C_END
4459 
4460 /*MC
4461    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4462 
4463    Options Database Keys:
4464 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4465 
4466   Level: beginner
4467 
4468 .seealso: MatCreateMPIAIJ
4469 M*/
4470 
4471 EXTERN_C_BEGIN
4472 #undef __FUNCT__
4473 #define __FUNCT__ "MatCreate_MPIAIJ"
4474 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4475 {
4476   Mat_MPIAIJ     *b;
4477   PetscErrorCode ierr;
4478   PetscMPIInt    size;
4479 
4480   PetscFunctionBegin;
4481   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
4482 
4483   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
4484   B->data         = (void*)b;
4485   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4486   B->factor       = 0;
4487   B->rmap.bs      = 1;
4488   B->assembled    = PETSC_FALSE;
4489   B->mapping      = 0;
4490 
4491   B->insertmode      = NOT_SET_VALUES;
4492   b->size            = size;
4493   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
4494 
4495   /* build cache for off array entries formed */
4496   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
4497   b->donotstash  = PETSC_FALSE;
4498   b->colmap      = 0;
4499   b->garray      = 0;
4500   b->roworiented = PETSC_TRUE;
4501 
4502   /* stuff used for matrix vector multiply */
4503   b->lvec      = PETSC_NULL;
4504   b->Mvctx     = PETSC_NULL;
4505 
4506   /* stuff for MatGetRow() */
4507   b->rowindices   = 0;
4508   b->rowvalues    = 0;
4509   b->getrowactive = PETSC_FALSE;
4510 
4511 
4512   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4513                                      "MatStoreValues_MPIAIJ",
4514                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4515   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4516                                      "MatRetrieveValues_MPIAIJ",
4517                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4518   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4519 				     "MatGetDiagonalBlock_MPIAIJ",
4520                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4521   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4522 				     "MatIsTranspose_MPIAIJ",
4523 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4524   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4525 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4526 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4527   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4528 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4529 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4530   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4531 				     "MatDiagonalScaleLocal_MPIAIJ",
4532 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4533   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4534                                      "MatConvert_MPIAIJ_MPICSRPERM",
4535                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
4536   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4537                                      "MatConvert_MPIAIJ_MPICRL",
4538                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
4539   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4540   PetscFunctionReturn(0);
4541 }
4542 EXTERN_C_END
4543 
4544 #undef __FUNCT__
4545 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
4546 /*@C
4547      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4548          and "off-diagonal" part of the matrix in CSR format.
4549 
4550    Collective on MPI_Comm
4551 
4552    Input Parameters:
4553 +  comm - MPI communicator
4554 .  m - number of local rows (Cannot be PETSC_DECIDE)
4555 .  n - This value should be the same as the local size used in creating the
4556        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4557        calculated if N is given) For square matrices n is almost always m.
4558 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4559 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4560 .   i - row indices for "diagonal" portion of matrix
4561 .   j - column indices
4562 .   a - matrix values
4563 .   oi - row indices for "off-diagonal" portion of matrix
4564 .   oj - column indices
4565 -   oa - matrix values
4566 
4567    Output Parameter:
4568 .   mat - the matrix
4569 
4570    Level: advanced
4571 
4572    Notes:
4573        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4574 
4575        The i and j indices are 0 based
4576 
4577        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4578 
4579 
4580 .keywords: matrix, aij, compressed row, sparse, parallel
4581 
4582 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4583           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4584 @*/
4585 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4586 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4587 {
4588   PetscErrorCode ierr;
4589   Mat_MPIAIJ     *maij;
4590 
4591  PetscFunctionBegin;
4592   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4593   if (i[0]) {
4594     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4595   }
4596   if (oi[0]) {
4597     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4598   }
4599   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4600   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4601   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4602   maij = (Mat_MPIAIJ*) (*mat)->data;
4603   maij->donotstash     = PETSC_TRUE;
4604   (*mat)->preallocated = PETSC_TRUE;
4605 
4606   (*mat)->rmap.bs = (*mat)->cmap.bs = 1;
4607   ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr);
4608   ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr);
4609 
4610   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
4611   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
4612 
4613   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4614   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4615   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4616   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4617 
4618   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4619   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4620   PetscFunctionReturn(0);
4621 }
4622 
4623 /*
4624     Special version for direct calls from Fortran
4625 */
4626 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4627 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4628 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4629 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4630 #endif
4631 
4632 /* Change these macros so can be used in void function */
4633 #undef CHKERRQ
4634 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
4635 #undef SETERRQ2
4636 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
4637 #undef SETERRQ
4638 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
4639 
4640 EXTERN_C_BEGIN
4641 #undef __FUNCT__
4642 #define __FUNCT__ "matsetvaluesmpiaij_"
4643 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4644 {
4645   Mat            mat = *mmat;
4646   PetscInt       m = *mm, n = *mn;
4647   InsertMode     addv = *maddv;
4648   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
4649   PetscScalar    value;
4650   PetscErrorCode ierr;
4651 
4652   MatPreallocated(mat);
4653   if (mat->insertmode == NOT_SET_VALUES) {
4654     mat->insertmode = addv;
4655   }
4656 #if defined(PETSC_USE_DEBUG)
4657   else if (mat->insertmode != addv) {
4658     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4659   }
4660 #endif
4661   {
4662   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4663   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4664   PetscTruth     roworiented = aij->roworiented;
4665 
4666   /* Some Variables required in the macro */
4667   Mat            A = aij->A;
4668   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
4669   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4670   PetscScalar    *aa = a->a;
4671   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4672   Mat            B = aij->B;
4673   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4674   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4675   PetscScalar    *ba = b->a;
4676 
4677   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4678   PetscInt       nonew = a->nonew;
4679   PetscScalar    *ap1,*ap2;
4680 
4681   PetscFunctionBegin;
4682   for (i=0; i<m; i++) {
4683     if (im[i] < 0) continue;
4684 #if defined(PETSC_USE_DEBUG)
4685     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4686 #endif
4687     if (im[i] >= rstart && im[i] < rend) {
4688       row      = im[i] - rstart;
4689       lastcol1 = -1;
4690       rp1      = aj + ai[row];
4691       ap1      = aa + ai[row];
4692       rmax1    = aimax[row];
4693       nrow1    = ailen[row];
4694       low1     = 0;
4695       high1    = nrow1;
4696       lastcol2 = -1;
4697       rp2      = bj + bi[row];
4698       ap2      = ba + bi[row];
4699       rmax2    = bimax[row];
4700       nrow2    = bilen[row];
4701       low2     = 0;
4702       high2    = nrow2;
4703 
4704       for (j=0; j<n; j++) {
4705         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4706         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4707         if (in[j] >= cstart && in[j] < cend){
4708           col = in[j] - cstart;
4709           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4710         } else if (in[j] < 0) continue;
4711 #if defined(PETSC_USE_DEBUG)
4712         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4713 #endif
4714         else {
4715           if (mat->was_assembled) {
4716             if (!aij->colmap) {
4717               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4718             }
4719 #if defined (PETSC_USE_CTABLE)
4720             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4721 	    col--;
4722 #else
4723             col = aij->colmap[in[j]] - 1;
4724 #endif
4725             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4726               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4727               col =  in[j];
4728               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4729               B = aij->B;
4730               b = (Mat_SeqAIJ*)B->data;
4731               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4732               rp2      = bj + bi[row];
4733               ap2      = ba + bi[row];
4734               rmax2    = bimax[row];
4735               nrow2    = bilen[row];
4736               low2     = 0;
4737               high2    = nrow2;
4738               bm       = aij->B->rmap.n;
4739               ba = b->a;
4740             }
4741           } else col = in[j];
4742           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4743         }
4744       }
4745     } else {
4746       if (!aij->donotstash) {
4747         if (roworiented) {
4748           if (ignorezeroentries && v[i*n] == 0.0) continue;
4749           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4750         } else {
4751           if (ignorezeroentries && v[i] == 0.0) continue;
4752           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4753         }
4754       }
4755     }
4756   }}
4757   PetscFunctionReturnVoid();
4758 }
4759 EXTERN_C_END
4760 
4761