xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision e5bd52469b00a8f2942d86b46a52285e84b32048)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatGetValues_MPIAIJ"
240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
241 {
242   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
243   PetscErrorCode ierr;
244   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
245   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
246 
247   PetscFunctionBegin;
248   for (i=0; i<m; i++) {
249     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
250     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
251     if (idxm[i] >= rstart && idxm[i] < rend) {
252       row = idxm[i] - rstart;
253       for (j=0; j<n; j++) {
254         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
255         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
256         if (idxn[j] >= cstart && idxn[j] < cend){
257           col = idxn[j] - cstart;
258           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
259         } else {
260           if (!aij->colmap) {
261             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
262           }
263 #if defined (PETSC_USE_CTABLE)
264           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
265           col --;
266 #else
267           col = aij->colmap[idxn[j]] - 1;
268 #endif
269           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
270           else {
271             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
272           }
273         }
274       }
275     } else {
276       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
277     }
278   }
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
287   PetscErrorCode ierr;
288   PetscInt       nstash,reallocs;
289   InsertMode     addv;
290 
291   PetscFunctionBegin;
292   if (aij->donotstash) {
293     PetscFunctionReturn(0);
294   }
295 
296   /* make sure all processors are either in INSERTMODE or ADDMODE */
297   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
298   if (addv == (ADD_VALUES|INSERT_VALUES)) {
299     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
300   }
301   mat->insertmode = addv; /* in case this processor had no cache */
302 
303   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
304   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
305   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
312 {
313   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
314   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
315   PetscErrorCode ierr;
316   PetscMPIInt    n;
317   PetscInt       i,j,rstart,ncols,flg;
318   PetscInt       *row,*col,other_disassembled;
319   PetscScalar    *val;
320   InsertMode     addv = mat->insertmode;
321 
322   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
323   PetscFunctionBegin;
324   if (!aij->donotstash) {
325     while (1) {
326       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
327       if (!flg) break;
328 
329       for (i=0; i<n;) {
330         /* Now identify the consecutive vals belonging to the same row */
331         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
332         if (j < n) ncols = j-i;
333         else       ncols = n-i;
334         /* Now assemble all these values with a single function call */
335         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
336         i = j;
337       }
338     }
339     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
340   }
341   a->compressedrow.use     = PETSC_FALSE;
342   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
344 
345   /* determine if any processor has disassembled, if so we must
346      also disassemble ourselfs, in order that we may reassemble. */
347   /*
348      if nonzero structure of submatrix B cannot change then we know that
349      no processor disassembled thus we can skip this stuff
350   */
351   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
352     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
353     if (mat->was_assembled && !other_disassembled) {
354       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
355     }
356   }
357   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
358     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
359   }
360   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
361   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
362   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
363   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
364 
365   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
366   aij->rowvalues = 0;
367 
368   /* used by MatAXPY() */
369   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
370   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
371 
372   PetscFunctionReturn(0);
373 }
374 
375 #undef __FUNCT__
376 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
377 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
378 {
379   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
380   PetscErrorCode ierr;
381 
382   PetscFunctionBegin;
383   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
384   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
385   PetscFunctionReturn(0);
386 }
387 
388 #undef __FUNCT__
389 #define __FUNCT__ "MatZeroRows_MPIAIJ"
390 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
391 {
392   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
393   PetscErrorCode ierr;
394   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
395   PetscInt       i,*owners = A->rmap.range;
396   PetscInt       *nprocs,j,idx,nsends,row;
397   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
398   PetscInt       *rvalues,count,base,slen,*source;
399   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
400   MPI_Comm       comm = A->comm;
401   MPI_Request    *send_waits,*recv_waits;
402   MPI_Status     recv_status,*send_status;
403 #if defined(PETSC_DEBUG)
404   PetscTruth     found = PETSC_FALSE;
405 #endif
406 
407   PetscFunctionBegin;
408   /*  first count number of contributors to each processor */
409   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
410   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
411   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
412   j = 0;
413   for (i=0; i<N; i++) {
414     if (lastidx > (idx = rows[i])) j = 0;
415     lastidx = idx;
416     for (; j<size; j++) {
417       if (idx >= owners[j] && idx < owners[j+1]) {
418         nprocs[2*j]++;
419         nprocs[2*j+1] = 1;
420         owner[i] = j;
421 #if defined(PETSC_DEBUG)
422         found = PETSC_TRUE;
423 #endif
424         break;
425       }
426     }
427 #if defined(PETSC_DEBUG)
428     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
429     found = PETSC_FALSE;
430 #endif
431   }
432   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
433 
434   /* inform other processors of number of messages and max length*/
435   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
436 
437   /* post receives:   */
438   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
439   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
440   for (i=0; i<nrecvs; i++) {
441     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
442   }
443 
444   /* do sends:
445       1) starts[i] gives the starting index in svalues for stuff going to
446          the ith processor
447   */
448   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
449   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
450   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
451   starts[0] = 0;
452   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
453   for (i=0; i<N; i++) {
454     svalues[starts[owner[i]]++] = rows[i];
455   }
456 
457   starts[0] = 0;
458   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
459   count = 0;
460   for (i=0; i<size; i++) {
461     if (nprocs[2*i+1]) {
462       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
463     }
464   }
465   ierr = PetscFree(starts);CHKERRQ(ierr);
466 
467   base = owners[rank];
468 
469   /*  wait on receives */
470   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
471   source = lens + nrecvs;
472   count  = nrecvs; slen = 0;
473   while (count) {
474     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
475     /* unpack receives into our local space */
476     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
477     source[imdex]  = recv_status.MPI_SOURCE;
478     lens[imdex]    = n;
479     slen          += n;
480     count--;
481   }
482   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
483 
484   /* move the data into the send scatter */
485   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
486   count = 0;
487   for (i=0; i<nrecvs; i++) {
488     values = rvalues + i*nmax;
489     for (j=0; j<lens[i]; j++) {
490       lrows[count++] = values[j] - base;
491     }
492   }
493   ierr = PetscFree(rvalues);CHKERRQ(ierr);
494   ierr = PetscFree(lens);CHKERRQ(ierr);
495   ierr = PetscFree(owner);CHKERRQ(ierr);
496   ierr = PetscFree(nprocs);CHKERRQ(ierr);
497 
498   /* actually zap the local rows */
499   /*
500         Zero the required rows. If the "diagonal block" of the matrix
501      is square and the user wishes to set the diagonal we use separate
502      code so that MatSetValues() is not called for each diagonal allocating
503      new memory, thus calling lots of mallocs and slowing things down.
504 
505        Contributed by: Matthew Knepley
506   */
507   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
508   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
509   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
510     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
511   } else if (diag != 0.0) {
512     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
513     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
514       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
515 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
516     }
517     for (i = 0; i < slen; i++) {
518       row  = lrows[i] + rstart;
519       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
520     }
521     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
522     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
523   } else {
524     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
525   }
526   ierr = PetscFree(lrows);CHKERRQ(ierr);
527 
528   /* wait on sends */
529   if (nsends) {
530     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
531     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
532     ierr = PetscFree(send_status);CHKERRQ(ierr);
533   }
534   ierr = PetscFree(send_waits);CHKERRQ(ierr);
535   ierr = PetscFree(svalues);CHKERRQ(ierr);
536 
537   PetscFunctionReturn(0);
538 }
539 
540 #undef __FUNCT__
541 #define __FUNCT__ "MatMult_MPIAIJ"
542 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
543 {
544   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
545   PetscErrorCode ierr;
546   PetscInt       nt;
547 
548   PetscFunctionBegin;
549   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
550   if (nt != A->cmap.n) {
551     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
552   }
553   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
554   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
555   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
556   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatMultAdd_MPIAIJ"
562 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
563 {
564   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
565   PetscErrorCode ierr;
566 
567   PetscFunctionBegin;
568   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
569   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
570   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
571   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 #undef __FUNCT__
576 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
577 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
578 {
579   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
580   PetscErrorCode ierr;
581   PetscTruth     merged;
582 
583   PetscFunctionBegin;
584   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
585   /* do nondiagonal part */
586   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
587   if (!merged) {
588     /* send it on its way */
589     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
590     /* do local part */
591     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
592     /* receive remote parts: note this assumes the values are not actually */
593     /* added in yy until the next line, */
594     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
595   } else {
596     /* do local part */
597     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
598     /* send it on its way */
599     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
600     /* values actually were received in the Begin() but we need to call this nop */
601     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
602   }
603   PetscFunctionReturn(0);
604 }
605 
606 EXTERN_C_BEGIN
607 #undef __FUNCT__
608 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
609 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
610 {
611   MPI_Comm       comm;
612   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
613   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
614   IS             Me,Notme;
615   PetscErrorCode ierr;
616   PetscInt       M,N,first,last,*notme,i;
617   PetscMPIInt    size;
618 
619   PetscFunctionBegin;
620 
621   /* Easy test: symmetric diagonal block */
622   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
623   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
624   if (!*f) PetscFunctionReturn(0);
625   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
626   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
627   if (size == 1) PetscFunctionReturn(0);
628 
629   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
630   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
631   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
632   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
633   for (i=0; i<first; i++) notme[i] = i;
634   for (i=last; i<M; i++) notme[i-last+first] = i;
635   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
636   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
637   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
638   Aoff = Aoffs[0];
639   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
640   Boff = Boffs[0];
641   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
642   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
643   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
644   ierr = ISDestroy(Me);CHKERRQ(ierr);
645   ierr = ISDestroy(Notme);CHKERRQ(ierr);
646 
647   PetscFunctionReturn(0);
648 }
649 EXTERN_C_END
650 
651 #undef __FUNCT__
652 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
653 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
654 {
655   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
656   PetscErrorCode ierr;
657 
658   PetscFunctionBegin;
659   /* do nondiagonal part */
660   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
661   /* send it on its way */
662   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
663   /* do local part */
664   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
665   /* receive remote parts */
666   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
667   PetscFunctionReturn(0);
668 }
669 
670 /*
671   This only works correctly for square matrices where the subblock A->A is the
672    diagonal block
673 */
674 #undef __FUNCT__
675 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
676 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
677 {
678   PetscErrorCode ierr;
679   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
680 
681   PetscFunctionBegin;
682   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
683   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
684     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
685   }
686   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 
690 #undef __FUNCT__
691 #define __FUNCT__ "MatScale_MPIAIJ"
692 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
693 {
694   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
695   PetscErrorCode ierr;
696 
697   PetscFunctionBegin;
698   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
699   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
700   PetscFunctionReturn(0);
701 }
702 
703 #undef __FUNCT__
704 #define __FUNCT__ "MatDestroy_MPIAIJ"
705 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
706 {
707   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
708   PetscErrorCode ierr;
709 
710   PetscFunctionBegin;
711 #if defined(PETSC_USE_LOG)
712   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
713 #endif
714   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
715   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
716   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
717 #if defined (PETSC_USE_CTABLE)
718   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
719 #else
720   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
721 #endif
722   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
723   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
724   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
725   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
726   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
727   ierr = PetscFree(aij);CHKERRQ(ierr);
728 
729   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
730   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
736   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatView_MPIAIJ_Binary"
742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
743 {
744   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
745   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
746   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
747   PetscErrorCode    ierr;
748   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
749   int               fd;
750   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
751   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
752   PetscScalar       *column_values;
753 
754   PetscFunctionBegin;
755   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
756   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
757   nz   = A->nz + B->nz;
758   if (!rank) {
759     header[0] = MAT_FILE_COOKIE;
760     header[1] = mat->rmap.N;
761     header[2] = mat->cmap.N;
762     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
763     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
764     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
765     /* get largest number of rows any processor has */
766     rlen = mat->rmap.n;
767     range = mat->rmap.range;
768     for (i=1; i<size; i++) {
769       rlen = PetscMax(rlen,range[i+1] - range[i]);
770     }
771   } else {
772     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
773     rlen = mat->rmap.n;
774   }
775 
776   /* load up the local row counts */
777   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
778   for (i=0; i<mat->rmap.n; i++) {
779     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
780   }
781 
782   /* store the row lengths to the file */
783   if (!rank) {
784     MPI_Status status;
785     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
786     for (i=1; i<size; i++) {
787       rlen = range[i+1] - range[i];
788       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
789       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
790     }
791   } else {
792     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
793   }
794   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
795 
796   /* load up the local column indices */
797   nzmax = nz; /* )th processor needs space a largest processor needs */
798   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
799   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
800   cnt  = 0;
801   for (i=0; i<mat->rmap.n; i++) {
802     for (j=B->i[i]; j<B->i[i+1]; j++) {
803       if ( (col = garray[B->j[j]]) > cstart) break;
804       column_indices[cnt++] = col;
805     }
806     for (k=A->i[i]; k<A->i[i+1]; k++) {
807       column_indices[cnt++] = A->j[k] + cstart;
808     }
809     for (; j<B->i[i+1]; j++) {
810       column_indices[cnt++] = garray[B->j[j]];
811     }
812   }
813   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
814 
815   /* store the column indices to the file */
816   if (!rank) {
817     MPI_Status status;
818     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
819     for (i=1; i<size; i++) {
820       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
821       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
822       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
823       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
824     }
825   } else {
826     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
827     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
828   }
829   ierr = PetscFree(column_indices);CHKERRQ(ierr);
830 
831   /* load up the local column values */
832   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
833   cnt  = 0;
834   for (i=0; i<mat->rmap.n; i++) {
835     for (j=B->i[i]; j<B->i[i+1]; j++) {
836       if ( garray[B->j[j]] > cstart) break;
837       column_values[cnt++] = B->a[j];
838     }
839     for (k=A->i[i]; k<A->i[i+1]; k++) {
840       column_values[cnt++] = A->a[k];
841     }
842     for (; j<B->i[i+1]; j++) {
843       column_values[cnt++] = B->a[j];
844     }
845   }
846   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
847 
848   /* store the column values to the file */
849   if (!rank) {
850     MPI_Status status;
851     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
852     for (i=1; i<size; i++) {
853       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
854       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
855       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
856       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
857     }
858   } else {
859     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
860     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
861   }
862   ierr = PetscFree(column_values);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
869 {
870   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
871   PetscErrorCode    ierr;
872   PetscMPIInt       rank = aij->rank,size = aij->size;
873   PetscTruth        isdraw,iascii,isbinary;
874   PetscViewer       sviewer;
875   PetscViewerFormat format;
876 
877   PetscFunctionBegin;
878   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
880   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
881   if (iascii) {
882     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
883     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
884       MatInfo    info;
885       PetscTruth inodes;
886 
887       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
888       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
889       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
890       if (!inodes) {
891         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
892 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
893       } else {
894         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
895 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
896       }
897       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
898       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
899       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
900       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
901       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
902       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
903       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
904       PetscFunctionReturn(0);
905     } else if (format == PETSC_VIEWER_ASCII_INFO) {
906       PetscInt   inodecount,inodelimit,*inodes;
907       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
908       if (inodes) {
909         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
910       } else {
911         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
912       }
913       PetscFunctionReturn(0);
914     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
915       PetscFunctionReturn(0);
916     }
917   } else if (isbinary) {
918     if (size == 1) {
919       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
920       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
921     } else {
922       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
923     }
924     PetscFunctionReturn(0);
925   } else if (isdraw) {
926     PetscDraw  draw;
927     PetscTruth isnull;
928     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
929     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
930   }
931 
932   if (size == 1) {
933     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
934     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
935   } else {
936     /* assemble the entire matrix onto first processor. */
937     Mat         A;
938     Mat_SeqAIJ  *Aloc;
939     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
940     PetscScalar *a;
941 
942     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
943     if (!rank) {
944       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
945     } else {
946       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
947     }
948     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
949     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
950     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
951     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
952 
953     /* copy over the A part */
954     Aloc = (Mat_SeqAIJ*)aij->A->data;
955     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
956     row = mat->rmap.rstart;
957     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
958     for (i=0; i<m; i++) {
959       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
960       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
961     }
962     aj = Aloc->j;
963     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
964 
965     /* copy over the B part */
966     Aloc = (Mat_SeqAIJ*)aij->B->data;
967     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
968     row  = mat->rmap.rstart;
969     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
970     ct   = cols;
971     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
972     for (i=0; i<m; i++) {
973       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
974       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
975     }
976     ierr = PetscFree(ct);CHKERRQ(ierr);
977     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
979     /*
980        Everyone has to call to draw the matrix since the graphics waits are
981        synchronized across all processors that share the PetscDraw object
982     */
983     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
984     if (!rank) {
985       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
986       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
987     }
988     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
989     ierr = MatDestroy(A);CHKERRQ(ierr);
990   }
991   PetscFunctionReturn(0);
992 }
993 
994 #undef __FUNCT__
995 #define __FUNCT__ "MatView_MPIAIJ"
996 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
997 {
998   PetscErrorCode ierr;
999   PetscTruth     iascii,isdraw,issocket,isbinary;
1000 
1001   PetscFunctionBegin;
1002   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1003   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1004   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1005   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1006   if (iascii || isdraw || isbinary || issocket) {
1007     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1008   } else {
1009     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1010   }
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatRelax_MPIAIJ"
1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1017 {
1018   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1019   PetscErrorCode ierr;
1020   Vec            bb1;
1021 
1022   PetscFunctionBegin;
1023   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1024 
1025   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1026 
1027   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1028     if (flag & SOR_ZERO_INITIAL_GUESS) {
1029       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1030       its--;
1031     }
1032 
1033     while (its--) {
1034       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1035       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1036 
1037       /* update rhs: bb1 = bb - B*x */
1038       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1039       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1040 
1041       /* local sweep */
1042       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1043       CHKERRQ(ierr);
1044     }
1045   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1046     if (flag & SOR_ZERO_INITIAL_GUESS) {
1047       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1048       its--;
1049     }
1050     while (its--) {
1051       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1052       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1053 
1054       /* update rhs: bb1 = bb - B*x */
1055       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1056       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1057 
1058       /* local sweep */
1059       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1060       CHKERRQ(ierr);
1061     }
1062   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1063     if (flag & SOR_ZERO_INITIAL_GUESS) {
1064       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1065       its--;
1066     }
1067     while (its--) {
1068       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1069       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1070 
1071       /* update rhs: bb1 = bb - B*x */
1072       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1073       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1074 
1075       /* local sweep */
1076       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1077       CHKERRQ(ierr);
1078     }
1079   } else {
1080     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1081   }
1082 
1083   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatPermute_MPIAIJ"
1089 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1090 {
1091   MPI_Comm       comm,pcomm;
1092   PetscInt       first,local_size,nrows,*rows;
1093   int            ntids;
1094   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1095   PetscErrorCode ierr;
1096 
1097   PetscFunctionBegin;
1098   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1099   /* make a collective version of 'rowp' */
1100   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1101   if (pcomm==comm) {
1102     crowp = rowp;
1103   } else {
1104     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1105     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1106     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1107     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1108   }
1109   /* collect the global row permutation and invert it */
1110   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1111   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1112   if (pcomm!=comm) {
1113     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1114   }
1115   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1116   /* get the local target indices */
1117   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1118   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1119   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1120   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1121   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1122   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1123   /* the column permutation is so much easier;
1124      make a local version of 'colp' and invert it */
1125   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1126   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1127   if (ntids==1) {
1128     lcolp = colp;
1129   } else {
1130     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1131     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1132     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1133   }
1134   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1135   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1136   if (ntids>1) {
1137     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1138     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1139   }
1140   /* now we just get the submatrix */
1141   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1142   /* clean up */
1143   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1144   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1150 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1151 {
1152   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1153   Mat            A = mat->A,B = mat->B;
1154   PetscErrorCode ierr;
1155   PetscReal      isend[5],irecv[5];
1156 
1157   PetscFunctionBegin;
1158   info->block_size     = 1.0;
1159   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1160   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1161   isend[3] = info->memory;  isend[4] = info->mallocs;
1162   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1163   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1164   isend[3] += info->memory;  isend[4] += info->mallocs;
1165   if (flag == MAT_LOCAL) {
1166     info->nz_used      = isend[0];
1167     info->nz_allocated = isend[1];
1168     info->nz_unneeded  = isend[2];
1169     info->memory       = isend[3];
1170     info->mallocs      = isend[4];
1171   } else if (flag == MAT_GLOBAL_MAX) {
1172     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1173     info->nz_used      = irecv[0];
1174     info->nz_allocated = irecv[1];
1175     info->nz_unneeded  = irecv[2];
1176     info->memory       = irecv[3];
1177     info->mallocs      = irecv[4];
1178   } else if (flag == MAT_GLOBAL_SUM) {
1179     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1180     info->nz_used      = irecv[0];
1181     info->nz_allocated = irecv[1];
1182     info->nz_unneeded  = irecv[2];
1183     info->memory       = irecv[3];
1184     info->mallocs      = irecv[4];
1185   }
1186   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1187   info->fill_ratio_needed = 0;
1188   info->factor_mallocs    = 0;
1189   info->rows_global       = (double)matin->rmap.N;
1190   info->columns_global    = (double)matin->cmap.N;
1191   info->rows_local        = (double)matin->rmap.n;
1192   info->columns_local     = (double)matin->cmap.N;
1193 
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "MatSetOption_MPIAIJ"
1199 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1200 {
1201   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1202   PetscErrorCode ierr;
1203 
1204   PetscFunctionBegin;
1205   switch (op) {
1206   case MAT_NO_NEW_NONZERO_LOCATIONS:
1207   case MAT_YES_NEW_NONZERO_LOCATIONS:
1208   case MAT_COLUMNS_UNSORTED:
1209   case MAT_COLUMNS_SORTED:
1210   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1211   case MAT_KEEP_ZEROED_ROWS:
1212   case MAT_NEW_NONZERO_LOCATION_ERR:
1213   case MAT_USE_INODES:
1214   case MAT_DO_NOT_USE_INODES:
1215   case MAT_IGNORE_ZERO_ENTRIES:
1216     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1217     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1218     break;
1219   case MAT_ROW_ORIENTED:
1220     a->roworiented = PETSC_TRUE;
1221     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1222     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1223     break;
1224   case MAT_ROWS_SORTED:
1225   case MAT_ROWS_UNSORTED:
1226   case MAT_YES_NEW_DIAGONALS:
1227     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1228     break;
1229   case MAT_COLUMN_ORIENTED:
1230     a->roworiented = PETSC_FALSE;
1231     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1232     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1233     break;
1234   case MAT_IGNORE_OFF_PROC_ENTRIES:
1235     a->donotstash = PETSC_TRUE;
1236     break;
1237   case MAT_NO_NEW_DIAGONALS:
1238     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1239   case MAT_SYMMETRIC:
1240     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1241     break;
1242   case MAT_STRUCTURALLY_SYMMETRIC:
1243   case MAT_HERMITIAN:
1244   case MAT_SYMMETRY_ETERNAL:
1245     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1246     break;
1247   case MAT_NOT_SYMMETRIC:
1248   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1249   case MAT_NOT_HERMITIAN:
1250   case MAT_NOT_SYMMETRY_ETERNAL:
1251     break;
1252   default:
1253     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatGetRow_MPIAIJ"
1260 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1261 {
1262   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1263   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1264   PetscErrorCode ierr;
1265   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1266   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1267   PetscInt       *cmap,*idx_p;
1268 
1269   PetscFunctionBegin;
1270   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1271   mat->getrowactive = PETSC_TRUE;
1272 
1273   if (!mat->rowvalues && (idx || v)) {
1274     /*
1275         allocate enough space to hold information from the longest row.
1276     */
1277     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1278     PetscInt     max = 1,tmp;
1279     for (i=0; i<matin->rmap.n; i++) {
1280       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1281       if (max < tmp) { max = tmp; }
1282     }
1283     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1284     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1285   }
1286 
1287   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1288   lrow = row - rstart;
1289 
1290   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1291   if (!v)   {pvA = 0; pvB = 0;}
1292   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1293   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1294   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1295   nztot = nzA + nzB;
1296 
1297   cmap  = mat->garray;
1298   if (v  || idx) {
1299     if (nztot) {
1300       /* Sort by increasing column numbers, assuming A and B already sorted */
1301       PetscInt imark = -1;
1302       if (v) {
1303         *v = v_p = mat->rowvalues;
1304         for (i=0; i<nzB; i++) {
1305           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1306           else break;
1307         }
1308         imark = i;
1309         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1310         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1311       }
1312       if (idx) {
1313         *idx = idx_p = mat->rowindices;
1314         if (imark > -1) {
1315           for (i=0; i<imark; i++) {
1316             idx_p[i] = cmap[cworkB[i]];
1317           }
1318         } else {
1319           for (i=0; i<nzB; i++) {
1320             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1321             else break;
1322           }
1323           imark = i;
1324         }
1325         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1326         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1327       }
1328     } else {
1329       if (idx) *idx = 0;
1330       if (v)   *v   = 0;
1331     }
1332   }
1333   *nz = nztot;
1334   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1335   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #undef __FUNCT__
1340 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1341 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1342 {
1343   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1344 
1345   PetscFunctionBegin;
1346   if (!aij->getrowactive) {
1347     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1348   }
1349   aij->getrowactive = PETSC_FALSE;
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatNorm_MPIAIJ"
1355 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1356 {
1357   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1358   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1359   PetscErrorCode ierr;
1360   PetscInt       i,j,cstart = mat->cmap.rstart;
1361   PetscReal      sum = 0.0;
1362   PetscScalar    *v;
1363 
1364   PetscFunctionBegin;
1365   if (aij->size == 1) {
1366     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1367   } else {
1368     if (type == NORM_FROBENIUS) {
1369       v = amat->a;
1370       for (i=0; i<amat->nz; i++) {
1371 #if defined(PETSC_USE_COMPLEX)
1372         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1373 #else
1374         sum += (*v)*(*v); v++;
1375 #endif
1376       }
1377       v = bmat->a;
1378       for (i=0; i<bmat->nz; i++) {
1379 #if defined(PETSC_USE_COMPLEX)
1380         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1381 #else
1382         sum += (*v)*(*v); v++;
1383 #endif
1384       }
1385       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1386       *norm = sqrt(*norm);
1387     } else if (type == NORM_1) { /* max column norm */
1388       PetscReal *tmp,*tmp2;
1389       PetscInt    *jj,*garray = aij->garray;
1390       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1391       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1392       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1393       *norm = 0.0;
1394       v = amat->a; jj = amat->j;
1395       for (j=0; j<amat->nz; j++) {
1396         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1397       }
1398       v = bmat->a; jj = bmat->j;
1399       for (j=0; j<bmat->nz; j++) {
1400         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1401       }
1402       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1403       for (j=0; j<mat->cmap.N; j++) {
1404         if (tmp2[j] > *norm) *norm = tmp2[j];
1405       }
1406       ierr = PetscFree(tmp);CHKERRQ(ierr);
1407       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1408     } else if (type == NORM_INFINITY) { /* max row norm */
1409       PetscReal ntemp = 0.0;
1410       for (j=0; j<aij->A->rmap.n; j++) {
1411         v = amat->a + amat->i[j];
1412         sum = 0.0;
1413         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1414           sum += PetscAbsScalar(*v); v++;
1415         }
1416         v = bmat->a + bmat->i[j];
1417         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1418           sum += PetscAbsScalar(*v); v++;
1419         }
1420         if (sum > ntemp) ntemp = sum;
1421       }
1422       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1423     } else {
1424       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1425     }
1426   }
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 #undef __FUNCT__
1431 #define __FUNCT__ "MatTranspose_MPIAIJ"
1432 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1433 {
1434   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1435   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1436   PetscErrorCode ierr;
1437   PetscInt       M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz;
1438   PetscInt       cstart=A->cmap.rstart,ncol;
1439   Mat            B;
1440   PetscScalar    *array;
1441 
1442   PetscFunctionBegin;
1443   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1444 
1445   /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1446   ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n;
1447   ai = Aloc->i; aj = Aloc->j;
1448   bi = Bloc->i; bj = Bloc->j;
1449   ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1450   cols = d_nnz + na + 1; /* work space to be used by B part */
1451   ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1452   for (i=0; i<ai[ma]; i++){
1453     d_nnz[aj[i]] ++;
1454     aj[i] += cstart; /* global col index to be used by MatSetValues() */
1455   }
1456 
1457   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1458   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1459   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1460   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1461 
1462   /* copy over the A part */
1463   array = Aloc->a;
1464   row = A->rmap.rstart;
1465   for (i=0; i<ma; i++) {
1466     ncol = ai[i+1]-ai[i];
1467     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1468     row++; array += ncol; aj += ncol;
1469   }
1470   aj = Aloc->j;
1471   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1472 
1473   /* copy over the B part */
1474   array = Bloc->a;
1475   row = A->rmap.rstart;
1476   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1477   for (i=0; i<mb; i++) {
1478     ncol = bi[i+1]-bi[i];
1479     ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1480     row++; array += ncol; cols += ncol;
1481   }
1482   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1483   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1484   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1485   if (matout) {
1486     *matout = B;
1487   } else {
1488     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1489   }
1490   PetscFunctionReturn(0);
1491 }
1492 
1493 #undef __FUNCT__
1494 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1495 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1496 {
1497   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1498   Mat            a = aij->A,b = aij->B;
1499   PetscErrorCode ierr;
1500   PetscInt       s1,s2,s3;
1501 
1502   PetscFunctionBegin;
1503   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1504   if (rr) {
1505     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1506     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1507     /* Overlap communication with computation. */
1508     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1509   }
1510   if (ll) {
1511     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1512     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1513     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1514   }
1515   /* scale  the diagonal block */
1516   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1517 
1518   if (rr) {
1519     /* Do a scatter end and then right scale the off-diagonal block */
1520     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1521     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1522   }
1523 
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 #undef __FUNCT__
1528 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1529 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1530 {
1531   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1532   PetscErrorCode ierr;
1533 
1534   PetscFunctionBegin;
1535   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1536   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1537   PetscFunctionReturn(0);
1538 }
1539 #undef __FUNCT__
1540 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1541 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1542 {
1543   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1544   PetscErrorCode ierr;
1545 
1546   PetscFunctionBegin;
1547   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1548   PetscFunctionReturn(0);
1549 }
1550 
1551 #undef __FUNCT__
1552 #define __FUNCT__ "MatEqual_MPIAIJ"
1553 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1554 {
1555   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1556   Mat            a,b,c,d;
1557   PetscTruth     flg;
1558   PetscErrorCode ierr;
1559 
1560   PetscFunctionBegin;
1561   a = matA->A; b = matA->B;
1562   c = matB->A; d = matB->B;
1563 
1564   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1565   if (flg) {
1566     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1567   }
1568   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1569   PetscFunctionReturn(0);
1570 }
1571 
1572 #undef __FUNCT__
1573 #define __FUNCT__ "MatCopy_MPIAIJ"
1574 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1575 {
1576   PetscErrorCode ierr;
1577   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1578   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1579 
1580   PetscFunctionBegin;
1581   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1582   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1583     /* because of the column compression in the off-processor part of the matrix a->B,
1584        the number of columns in a->B and b->B may be different, hence we cannot call
1585        the MatCopy() directly on the two parts. If need be, we can provide a more
1586        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1587        then copying the submatrices */
1588     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1589   } else {
1590     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1591     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1592   }
1593   PetscFunctionReturn(0);
1594 }
1595 
1596 #undef __FUNCT__
1597 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1598 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1599 {
1600   PetscErrorCode ierr;
1601 
1602   PetscFunctionBegin;
1603   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1604   PetscFunctionReturn(0);
1605 }
1606 
1607 #include "petscblaslapack.h"
1608 #undef __FUNCT__
1609 #define __FUNCT__ "MatAXPY_MPIAIJ"
1610 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1611 {
1612   PetscErrorCode ierr;
1613   PetscInt       i;
1614   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1615   PetscBLASInt   bnz,one=1;
1616   Mat_SeqAIJ     *x,*y;
1617 
1618   PetscFunctionBegin;
1619   if (str == SAME_NONZERO_PATTERN) {
1620     PetscScalar alpha = a;
1621     x = (Mat_SeqAIJ *)xx->A->data;
1622     y = (Mat_SeqAIJ *)yy->A->data;
1623     bnz = (PetscBLASInt)x->nz;
1624     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1625     x = (Mat_SeqAIJ *)xx->B->data;
1626     y = (Mat_SeqAIJ *)yy->B->data;
1627     bnz = (PetscBLASInt)x->nz;
1628     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1629   } else if (str == SUBSET_NONZERO_PATTERN) {
1630     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1631 
1632     x = (Mat_SeqAIJ *)xx->B->data;
1633     y = (Mat_SeqAIJ *)yy->B->data;
1634     if (y->xtoy && y->XtoY != xx->B) {
1635       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1636       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1637     }
1638     if (!y->xtoy) { /* get xtoy */
1639       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1640       y->XtoY = xx->B;
1641       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1642     }
1643     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1644   } else {
1645     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1646   }
1647   PetscFunctionReturn(0);
1648 }
1649 
1650 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1651 
1652 #undef __FUNCT__
1653 #define __FUNCT__ "MatConjugate_MPIAIJ"
1654 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1655 {
1656 #if defined(PETSC_USE_COMPLEX)
1657   PetscErrorCode ierr;
1658   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1659 
1660   PetscFunctionBegin;
1661   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1662   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1663 #else
1664   PetscFunctionBegin;
1665 #endif
1666   PetscFunctionReturn(0);
1667 }
1668 
1669 #undef __FUNCT__
1670 #define __FUNCT__ "MatRealPart_MPIAIJ"
1671 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1672 {
1673   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1674   PetscErrorCode ierr;
1675 
1676   PetscFunctionBegin;
1677   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1678   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 #undef __FUNCT__
1683 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1684 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1685 {
1686   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1687   PetscErrorCode ierr;
1688 
1689   PetscFunctionBegin;
1690   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1691   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1692   PetscFunctionReturn(0);
1693 }
1694 
1695 #ifdef PETSC_HAVE_PBGL
1696 
1697 #include <boost/parallel/mpi/bsp_process_group.hpp>
1698 #include <boost/graph/distributed/ilu_default_graph.hpp>
1699 #include <boost/graph/distributed/ilu_0_block.hpp>
1700 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1701 #include <boost/graph/distributed/petsc/interface.hpp>
1702 #include <boost/multi_array.hpp>
1703 #include <boost/parallel/distributed_property_map.hpp>
1704 
1705 #undef __FUNCT__
1706 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1707 /*
1708   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1709 */
1710 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1711 {
1712   namespace petsc = boost::distributed::petsc;
1713 
1714   namespace graph_dist = boost::graph::distributed;
1715   using boost::graph::distributed::ilu_default::process_group_type;
1716   using boost::graph::ilu_permuted;
1717 
1718   PetscTruth      row_identity, col_identity;
1719   PetscContainer  c;
1720   PetscInt        m, n, M, N;
1721   PetscErrorCode  ierr;
1722 
1723   PetscFunctionBegin;
1724   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1725   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1726   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1727   if (!row_identity || !col_identity) {
1728     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1729   }
1730 
1731   process_group_type pg;
1732   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1733   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1734   lgraph_type&   level_graph = *lgraph_p;
1735   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1736 
1737   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1738   ilu_permuted(level_graph);
1739 
1740   /* put together the new matrix */
1741   ierr = MatCreate(A->comm, fact);CHKERRQ(ierr);
1742   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1743   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1744   ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr);
1745   ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr);
1746   ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1747   ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1748   (*fact)->factor = FACTOR_LU;
1749 
1750   ierr = PetscContainerCreate(A->comm, &c);
1751   ierr = PetscContainerSetPointer(c, lgraph_p);
1752   ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1753   PetscFunctionReturn(0);
1754 }
1755 
1756 #undef __FUNCT__
1757 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1758 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1759 {
1760   PetscFunctionBegin;
1761   PetscFunctionReturn(0);
1762 }
1763 
1764 #undef __FUNCT__
1765 #define __FUNCT__ "MatSolve_MPIAIJ"
1766 /*
1767   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1768 */
1769 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1770 {
1771   namespace graph_dist = boost::graph::distributed;
1772 
1773   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1774   lgraph_type*   lgraph_p;
1775   PetscContainer c;
1776   PetscErrorCode ierr;
1777 
1778   PetscFunctionBegin;
1779   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1780   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1781   ierr = VecCopy(b, x); CHKERRQ(ierr);
1782 
1783   PetscScalar* array_x;
1784   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1785   PetscInt sx;
1786   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1787 
1788   PetscScalar* array_b;
1789   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1790   PetscInt sb;
1791   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1792 
1793   lgraph_type&   level_graph = *lgraph_p;
1794   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1795 
1796   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1797   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1798                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1799 
1800   typedef boost::iterator_property_map<array_ref_type::iterator,
1801                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1802   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1803                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1804 
1805   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1806 
1807   PetscFunctionReturn(0);
1808 }
1809 #endif
1810 
1811 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1812   PetscInt       nzlocal,nsends,nrecvs;
1813   PetscInt       *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j;
1814   PetscScalar    *sbuf_a,**rbuf_a;
1815   PetscErrorCode (*MatDestroy)(Mat);
1816 } Mat_Redundant;
1817 
1818 #undef __FUNCT__
1819 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1820 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1821 {
1822   PetscErrorCode       ierr;
1823   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1824   PetscInt             i;
1825 
1826   PetscFunctionBegin;
1827   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1828   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1829   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1830   for (i=0; i<redund->nrecvs; i++){
1831     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1832     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1833   }
1834   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1835   ierr = PetscFree(redund);CHKERRQ(ierr);
1836   PetscFunctionReturn(0);
1837 }
1838 
1839 #undef __FUNCT__
1840 #define __FUNCT__ "MatDestroy_MatRedundant"
1841 PetscErrorCode MatDestroy_MatRedundant(Mat A)
1842 {
1843   PetscErrorCode  ierr;
1844   PetscContainer  container;
1845   Mat_Redundant   *redund=PETSC_NULL;
1846 
1847   PetscFunctionBegin;
1848   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1849   if (container) {
1850     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1851   } else {
1852     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1853   }
1854   A->ops->destroy = redund->MatDestroy;
1855   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
1856   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
1857   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
1858   PetscFunctionReturn(0);
1859 }
1860 
1861 #undef __FUNCT__
1862 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
1863 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
1864 {
1865   PetscMPIInt    rank,size;
1866   MPI_Comm       comm=mat->comm;
1867   PetscErrorCode ierr;
1868   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
1869   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
1870   PetscInt       *rowrange=mat->rmap.range;
1871   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1872   Mat            A=aij->A,B=aij->B,C=*matredundant;
1873   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
1874   PetscScalar    *sbuf_a;
1875   PetscInt       nzlocal=a->nz+b->nz;
1876   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
1877   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
1878   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
1879   PetscScalar    *vals,*aworkA,*aworkB;
1880   PetscMPIInt    tag1,tag2,tag3,imdex;
1881   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
1882                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
1883   MPI_Status     recv_status,*send_status;
1884   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
1885   PetscInt       **rbuf_j=PETSC_NULL;
1886   PetscScalar    **rbuf_a=PETSC_NULL;
1887   Mat_Redundant  *redund=PETSC_NULL;
1888   PetscContainer container;
1889 
1890   PetscFunctionBegin;
1891   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1892   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1893 
1894   if (reuse == MAT_REUSE_MATRIX) {
1895     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
1896     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
1897     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
1898     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
1899     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1900     if (container) {
1901       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1902     } else {
1903       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1904     }
1905     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
1906 
1907     nsends    = redund->nsends;
1908     nrecvs    = redund->nrecvs;
1909     send_rank = redund->send_rank; recv_rank = send_rank + size;
1910     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
1911     sbuf_j    = redund->sbuf_j;
1912     sbuf_a    = redund->sbuf_a;
1913     rbuf_j    = redund->rbuf_j;
1914     rbuf_a    = redund->rbuf_a;
1915   }
1916 
1917   if (reuse == MAT_INITIAL_MATRIX){
1918     PetscMPIInt  subrank,subsize;
1919     PetscInt     nleftover,np_subcomm;
1920     /* get the destination processors' id send_rank, nsends and nrecvs */
1921     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
1922     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
1923     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
1924     recv_rank = send_rank + size;
1925     np_subcomm = size/nsubcomm;
1926     nleftover  = size - nsubcomm*np_subcomm;
1927     nsends = 0; nrecvs = 0;
1928     for (i=0; i<size; i++){ /* i=rank*/
1929       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
1930         send_rank[nsends] = i; nsends++;
1931         recv_rank[nrecvs++] = i;
1932       }
1933     }
1934     if (rank >= size - nleftover){/* this proc is a leftover processor */
1935       i = size-nleftover-1;
1936       j = 0;
1937       while (j < nsubcomm - nleftover){
1938         send_rank[nsends++] = i;
1939         i--; j++;
1940       }
1941     }
1942 
1943     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
1944       for (i=0; i<nleftover; i++){
1945         recv_rank[nrecvs++] = size-nleftover+i;
1946       }
1947     }
1948 
1949     /* allocate sbuf_j, sbuf_a */
1950     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
1951     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
1952     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
1953   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1954 
1955   /* copy mat's local entries into the buffers */
1956   if (reuse == MAT_INITIAL_MATRIX){
1957     rownz_max = 0;
1958     rptr = sbuf_j;
1959     cols = sbuf_j + rend-rstart + 1;
1960     vals = sbuf_a;
1961     rptr[0] = 0;
1962     for (i=0; i<rend-rstart; i++){
1963       row = i + rstart;
1964       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1965       ncols  = nzA + nzB;
1966       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1967       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1968       /* load the column indices for this row into cols */
1969       lwrite = 0;
1970       for (l=0; l<nzB; l++) {
1971         if ((ctmp = bmap[cworkB[l]]) < cstart){
1972           vals[lwrite]   = aworkB[l];
1973           cols[lwrite++] = ctmp;
1974         }
1975       }
1976       for (l=0; l<nzA; l++){
1977         vals[lwrite]   = aworkA[l];
1978         cols[lwrite++] = cstart + cworkA[l];
1979       }
1980       for (l=0; l<nzB; l++) {
1981         if ((ctmp = bmap[cworkB[l]]) >= cend){
1982           vals[lwrite]   = aworkB[l];
1983           cols[lwrite++] = ctmp;
1984         }
1985       }
1986       vals += ncols;
1987       cols += ncols;
1988       rptr[i+1] = rptr[i] + ncols;
1989       if (rownz_max < ncols) rownz_max = ncols;
1990     }
1991     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
1992   } else { /* only copy matrix values into sbuf_a */
1993     rptr = sbuf_j;
1994     vals = sbuf_a;
1995     rptr[0] = 0;
1996     for (i=0; i<rend-rstart; i++){
1997       row = i + rstart;
1998       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1999       ncols  = nzA + nzB;
2000       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2001       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2002       lwrite = 0;
2003       for (l=0; l<nzB; l++) {
2004         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2005       }
2006       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2007       for (l=0; l<nzB; l++) {
2008         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2009       }
2010       vals += ncols;
2011       rptr[i+1] = rptr[i] + ncols;
2012     }
2013   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2014 
2015   /* send nzlocal to others, and recv other's nzlocal */
2016   /*--------------------------------------------------*/
2017   if (reuse == MAT_INITIAL_MATRIX){
2018     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2019     s_waits2 = s_waits3 + nsends;
2020     s_waits1 = s_waits2 + nsends;
2021     r_waits1 = s_waits1 + nsends;
2022     r_waits2 = r_waits1 + nrecvs;
2023     r_waits3 = r_waits2 + nrecvs;
2024   } else {
2025     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2026     r_waits3 = s_waits3 + nsends;
2027   }
2028 
2029   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2030   if (reuse == MAT_INITIAL_MATRIX){
2031     /* get new tags to keep the communication clean */
2032     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2033     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2034     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2035     rbuf_nz = sbuf_nz + nsends;
2036 
2037     /* post receives of other's nzlocal */
2038     for (i=0; i<nrecvs; i++){
2039       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2040     }
2041     /* send nzlocal to others */
2042     for (i=0; i<nsends; i++){
2043       sbuf_nz[i] = nzlocal;
2044       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2045     }
2046     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2047     count = nrecvs;
2048     while (count) {
2049       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2050       recv_rank[imdex] = recv_status.MPI_SOURCE;
2051       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2052       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2053 
2054       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2055       rbuf_nz[imdex] += i + 2;
2056       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2057       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2058       count--;
2059     }
2060     /* wait on sends of nzlocal */
2061     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2062     /* send mat->i,j to others, and recv from other's */
2063     /*------------------------------------------------*/
2064     for (i=0; i<nsends; i++){
2065       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2066       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2067     }
2068     /* wait on receives of mat->i,j */
2069     /*------------------------------*/
2070     count = nrecvs;
2071     while (count) {
2072       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2073       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2074       count--;
2075     }
2076     /* wait on sends of mat->i,j */
2077     /*---------------------------*/
2078     if (nsends) {
2079       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2080     }
2081   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2082 
2083   /* post receives, send and receive mat->a */
2084   /*----------------------------------------*/
2085   for (imdex=0; imdex<nrecvs; imdex++) {
2086     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2087   }
2088   for (i=0; i<nsends; i++){
2089     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2090   }
2091   count = nrecvs;
2092   while (count) {
2093     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2094     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2095     count--;
2096   }
2097   if (nsends) {
2098     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2099   }
2100 
2101   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2102 
2103   /* create redundant matrix */
2104   /*-------------------------*/
2105   if (reuse == MAT_INITIAL_MATRIX){
2106     /* compute rownz_max for preallocation */
2107     for (imdex=0; imdex<nrecvs; imdex++){
2108       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2109       rptr = rbuf_j[imdex];
2110       for (i=0; i<j; i++){
2111         ncols = rptr[i+1] - rptr[i];
2112         if (rownz_max < ncols) rownz_max = ncols;
2113       }
2114     }
2115 
2116     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2117     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2118     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2119     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2120     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2121   } else {
2122     C = *matredundant;
2123   }
2124 
2125   /* insert local matrix entries */
2126   rptr = sbuf_j;
2127   cols = sbuf_j + rend-rstart + 1;
2128   vals = sbuf_a;
2129   for (i=0; i<rend-rstart; i++){
2130     row   = i + rstart;
2131     ncols = rptr[i+1] - rptr[i];
2132     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2133     vals += ncols;
2134     cols += ncols;
2135   }
2136   /* insert received matrix entries */
2137   for (imdex=0; imdex<nrecvs; imdex++){
2138     rstart = rowrange[recv_rank[imdex]];
2139     rend   = rowrange[recv_rank[imdex]+1];
2140     rptr = rbuf_j[imdex];
2141     cols = rbuf_j[imdex] + rend-rstart + 1;
2142     vals = rbuf_a[imdex];
2143     for (i=0; i<rend-rstart; i++){
2144       row   = i + rstart;
2145       ncols = rptr[i+1] - rptr[i];
2146       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2147       vals += ncols;
2148       cols += ncols;
2149     }
2150   }
2151   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2152   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2153   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2154   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
2155   if (reuse == MAT_INITIAL_MATRIX){
2156     PetscContainer container;
2157     *matredundant = C;
2158     /* create a supporting struct and attach it to C for reuse */
2159     ierr = PetscNew(Mat_Redundant,&redund);CHKERRQ(ierr);
2160     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2161     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2162     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2163     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2164 
2165     redund->nzlocal = nzlocal;
2166     redund->nsends  = nsends;
2167     redund->nrecvs  = nrecvs;
2168     redund->send_rank = send_rank;
2169     redund->sbuf_nz = sbuf_nz;
2170     redund->sbuf_j  = sbuf_j;
2171     redund->sbuf_a  = sbuf_a;
2172     redund->rbuf_j  = rbuf_j;
2173     redund->rbuf_a  = rbuf_a;
2174 
2175     redund->MatDestroy = C->ops->destroy;
2176     C->ops->destroy    = MatDestroy_MatRedundant;
2177   }
2178   PetscFunctionReturn(0);
2179 }
2180 
2181 /* -------------------------------------------------------------------*/
2182 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2183        MatGetRow_MPIAIJ,
2184        MatRestoreRow_MPIAIJ,
2185        MatMult_MPIAIJ,
2186 /* 4*/ MatMultAdd_MPIAIJ,
2187        MatMultTranspose_MPIAIJ,
2188        MatMultTransposeAdd_MPIAIJ,
2189 #ifdef PETSC_HAVE_PBGL
2190        MatSolve_MPIAIJ,
2191 #else
2192        0,
2193 #endif
2194        0,
2195        0,
2196 /*10*/ 0,
2197        0,
2198        0,
2199        MatRelax_MPIAIJ,
2200        MatTranspose_MPIAIJ,
2201 /*15*/ MatGetInfo_MPIAIJ,
2202        MatEqual_MPIAIJ,
2203        MatGetDiagonal_MPIAIJ,
2204        MatDiagonalScale_MPIAIJ,
2205        MatNorm_MPIAIJ,
2206 /*20*/ MatAssemblyBegin_MPIAIJ,
2207        MatAssemblyEnd_MPIAIJ,
2208        0,
2209        MatSetOption_MPIAIJ,
2210        MatZeroEntries_MPIAIJ,
2211 /*25*/ MatZeroRows_MPIAIJ,
2212        0,
2213 #ifdef PETSC_HAVE_PBGL
2214        MatLUFactorNumeric_MPIAIJ,
2215 #else
2216        0,
2217 #endif
2218        0,
2219        0,
2220 /*30*/ MatSetUpPreallocation_MPIAIJ,
2221 #ifdef PETSC_HAVE_PBGL
2222        MatILUFactorSymbolic_MPIAIJ,
2223 #else
2224        0,
2225 #endif
2226        0,
2227        0,
2228        0,
2229 /*35*/ MatDuplicate_MPIAIJ,
2230        0,
2231        0,
2232        0,
2233        0,
2234 /*40*/ MatAXPY_MPIAIJ,
2235        MatGetSubMatrices_MPIAIJ,
2236        MatIncreaseOverlap_MPIAIJ,
2237        MatGetValues_MPIAIJ,
2238        MatCopy_MPIAIJ,
2239 /*45*/ 0,
2240        MatScale_MPIAIJ,
2241        0,
2242        0,
2243        0,
2244 /*50*/ MatSetBlockSize_MPIAIJ,
2245        0,
2246        0,
2247        0,
2248        0,
2249 /*55*/ MatFDColoringCreate_MPIAIJ,
2250        0,
2251        MatSetUnfactored_MPIAIJ,
2252        MatPermute_MPIAIJ,
2253        0,
2254 /*60*/ MatGetSubMatrix_MPIAIJ,
2255        MatDestroy_MPIAIJ,
2256        MatView_MPIAIJ,
2257        0,
2258        0,
2259 /*65*/ 0,
2260        0,
2261        0,
2262        0,
2263        0,
2264 /*70*/ 0,
2265        0,
2266        MatSetColoring_MPIAIJ,
2267 #if defined(PETSC_HAVE_ADIC)
2268        MatSetValuesAdic_MPIAIJ,
2269 #else
2270        0,
2271 #endif
2272        MatSetValuesAdifor_MPIAIJ,
2273 /*75*/ 0,
2274        0,
2275        0,
2276        0,
2277        0,
2278 /*80*/ 0,
2279        0,
2280        0,
2281        0,
2282 /*84*/ MatLoad_MPIAIJ,
2283        0,
2284        0,
2285        0,
2286        0,
2287        0,
2288 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2289        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2290        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2291        MatPtAP_Basic,
2292        MatPtAPSymbolic_MPIAIJ,
2293 /*95*/ MatPtAPNumeric_MPIAIJ,
2294        0,
2295        0,
2296        0,
2297        0,
2298 /*100*/0,
2299        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2300        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2301        MatConjugate_MPIAIJ,
2302        0,
2303 /*105*/MatSetValuesRow_MPIAIJ,
2304        MatRealPart_MPIAIJ,
2305        MatImaginaryPart_MPIAIJ,
2306        0,
2307        0,
2308 /*110*/0,
2309        MatGetRedundantMatrix_MPIAIJ};
2310 
2311 /* ----------------------------------------------------------------------------------------*/
2312 
2313 EXTERN_C_BEGIN
2314 #undef __FUNCT__
2315 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2316 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2317 {
2318   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2319   PetscErrorCode ierr;
2320 
2321   PetscFunctionBegin;
2322   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2323   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2324   PetscFunctionReturn(0);
2325 }
2326 EXTERN_C_END
2327 
2328 EXTERN_C_BEGIN
2329 #undef __FUNCT__
2330 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2331 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2332 {
2333   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2334   PetscErrorCode ierr;
2335 
2336   PetscFunctionBegin;
2337   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2338   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2339   PetscFunctionReturn(0);
2340 }
2341 EXTERN_C_END
2342 
2343 #include "petscpc.h"
2344 EXTERN_C_BEGIN
2345 #undef __FUNCT__
2346 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2347 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2348 {
2349   Mat_MPIAIJ     *b;
2350   PetscErrorCode ierr;
2351   PetscInt       i;
2352 
2353   PetscFunctionBegin;
2354   B->preallocated = PETSC_TRUE;
2355   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2356   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2357   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2358   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2359 
2360   B->rmap.bs = B->cmap.bs = 1;
2361   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2362   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2363   if (d_nnz) {
2364     for (i=0; i<B->rmap.n; i++) {
2365       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2366     }
2367   }
2368   if (o_nnz) {
2369     for (i=0; i<B->rmap.n; i++) {
2370       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2371     }
2372   }
2373   b = (Mat_MPIAIJ*)B->data;
2374 
2375   /* Explicitly create 2 MATSEQAIJ matrices. */
2376   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2377   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2378   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2379   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2380   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2381   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2382   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2383   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2384 
2385   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2386   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2387 
2388   PetscFunctionReturn(0);
2389 }
2390 EXTERN_C_END
2391 
2392 #undef __FUNCT__
2393 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2394 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2395 {
2396   Mat            mat;
2397   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2398   PetscErrorCode ierr;
2399 
2400   PetscFunctionBegin;
2401   *newmat       = 0;
2402   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2403   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2404   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2405   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2406   a    = (Mat_MPIAIJ*)mat->data;
2407 
2408   mat->factor       = matin->factor;
2409   mat->rmap.bs      = matin->rmap.bs;
2410   mat->assembled    = PETSC_TRUE;
2411   mat->insertmode   = NOT_SET_VALUES;
2412   mat->preallocated = PETSC_TRUE;
2413 
2414   a->size           = oldmat->size;
2415   a->rank           = oldmat->rank;
2416   a->donotstash     = oldmat->donotstash;
2417   a->roworiented    = oldmat->roworiented;
2418   a->rowindices     = 0;
2419   a->rowvalues      = 0;
2420   a->getrowactive   = PETSC_FALSE;
2421 
2422   ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2423   ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2424 
2425   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2426   if (oldmat->colmap) {
2427 #if defined (PETSC_USE_CTABLE)
2428     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2429 #else
2430     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2431     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2432     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2433 #endif
2434   } else a->colmap = 0;
2435   if (oldmat->garray) {
2436     PetscInt len;
2437     len  = oldmat->B->cmap.n;
2438     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2439     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2440     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2441   } else a->garray = 0;
2442 
2443   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2444   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2445   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2446   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2447   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2448   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2449   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2450   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2451   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2452   *newmat = mat;
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 #include "petscsys.h"
2457 
2458 #undef __FUNCT__
2459 #define __FUNCT__ "MatLoad_MPIAIJ"
2460 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2461 {
2462   Mat            A;
2463   PetscScalar    *vals,*svals;
2464   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2465   MPI_Status     status;
2466   PetscErrorCode ierr;
2467   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2468   PetscInt       i,nz,j,rstart,rend,mmax;
2469   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2470   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2471   PetscInt       cend,cstart,n,*rowners;
2472   int            fd;
2473 
2474   PetscFunctionBegin;
2475   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2476   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2477   if (!rank) {
2478     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2479     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2480     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2481   }
2482 
2483   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2484   M = header[1]; N = header[2];
2485   /* determine ownership of all rows */
2486   m    = M/size + ((M % size) > rank);
2487   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2488   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2489 
2490   /* First process needs enough room for process with most rows */
2491   if (!rank) {
2492     mmax       = rowners[1];
2493     for (i=2; i<size; i++) {
2494       mmax = PetscMax(mmax,rowners[i]);
2495     }
2496   } else mmax = m;
2497 
2498   rowners[0] = 0;
2499   for (i=2; i<=size; i++) {
2500     rowners[i] += rowners[i-1];
2501   }
2502   rstart = rowners[rank];
2503   rend   = rowners[rank+1];
2504 
2505   /* distribute row lengths to all processors */
2506   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2507   if (!rank) {
2508     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2509     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2510     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2511     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2512     for (j=0; j<m; j++) {
2513       procsnz[0] += ourlens[j];
2514     }
2515     for (i=1; i<size; i++) {
2516       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2517       /* calculate the number of nonzeros on each processor */
2518       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2519         procsnz[i] += rowlengths[j];
2520       }
2521       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2522     }
2523     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2524   } else {
2525     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2526   }
2527 
2528   if (!rank) {
2529     /* determine max buffer needed and allocate it */
2530     maxnz = 0;
2531     for (i=0; i<size; i++) {
2532       maxnz = PetscMax(maxnz,procsnz[i]);
2533     }
2534     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2535 
2536     /* read in my part of the matrix column indices  */
2537     nz   = procsnz[0];
2538     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2539     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2540 
2541     /* read in every one elses and ship off */
2542     for (i=1; i<size; i++) {
2543       nz   = procsnz[i];
2544       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2545       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2546     }
2547     ierr = PetscFree(cols);CHKERRQ(ierr);
2548   } else {
2549     /* determine buffer space needed for message */
2550     nz = 0;
2551     for (i=0; i<m; i++) {
2552       nz += ourlens[i];
2553     }
2554     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2555 
2556     /* receive message of column indices*/
2557     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2558     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2559     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2560   }
2561 
2562   /* determine column ownership if matrix is not square */
2563   if (N != M) {
2564     n      = N/size + ((N % size) > rank);
2565     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2566     cstart = cend - n;
2567   } else {
2568     cstart = rstart;
2569     cend   = rend;
2570     n      = cend - cstart;
2571   }
2572 
2573   /* loop over local rows, determining number of off diagonal entries */
2574   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2575   jj = 0;
2576   for (i=0; i<m; i++) {
2577     for (j=0; j<ourlens[i]; j++) {
2578       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2579       jj++;
2580     }
2581   }
2582 
2583   /* create our matrix */
2584   for (i=0; i<m; i++) {
2585     ourlens[i] -= offlens[i];
2586   }
2587   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2588   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2589   ierr = MatSetType(A,type);CHKERRQ(ierr);
2590   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2591 
2592   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2593   for (i=0; i<m; i++) {
2594     ourlens[i] += offlens[i];
2595   }
2596 
2597   if (!rank) {
2598     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2599 
2600     /* read in my part of the matrix numerical values  */
2601     nz   = procsnz[0];
2602     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2603 
2604     /* insert into matrix */
2605     jj      = rstart;
2606     smycols = mycols;
2607     svals   = vals;
2608     for (i=0; i<m; i++) {
2609       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2610       smycols += ourlens[i];
2611       svals   += ourlens[i];
2612       jj++;
2613     }
2614 
2615     /* read in other processors and ship out */
2616     for (i=1; i<size; i++) {
2617       nz   = procsnz[i];
2618       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2619       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2620     }
2621     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2622   } else {
2623     /* receive numeric values */
2624     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2625 
2626     /* receive message of values*/
2627     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2628     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2629     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2630 
2631     /* insert into matrix */
2632     jj      = rstart;
2633     smycols = mycols;
2634     svals   = vals;
2635     for (i=0; i<m; i++) {
2636       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2637       smycols += ourlens[i];
2638       svals   += ourlens[i];
2639       jj++;
2640     }
2641   }
2642   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2643   ierr = PetscFree(vals);CHKERRQ(ierr);
2644   ierr = PetscFree(mycols);CHKERRQ(ierr);
2645   ierr = PetscFree(rowners);CHKERRQ(ierr);
2646 
2647   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2648   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2649   *newmat = A;
2650   PetscFunctionReturn(0);
2651 }
2652 
2653 #undef __FUNCT__
2654 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2655 /*
2656     Not great since it makes two copies of the submatrix, first an SeqAIJ
2657   in local and then by concatenating the local matrices the end result.
2658   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2659 */
2660 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2661 {
2662   PetscErrorCode ierr;
2663   PetscMPIInt    rank,size;
2664   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2665   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2666   Mat            *local,M,Mreuse;
2667   PetscScalar    *vwork,*aa;
2668   MPI_Comm       comm = mat->comm;
2669   Mat_SeqAIJ     *aij;
2670 
2671 
2672   PetscFunctionBegin;
2673   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2674   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2675 
2676   if (call ==  MAT_REUSE_MATRIX) {
2677     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2678     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2679     local = &Mreuse;
2680     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2681   } else {
2682     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2683     Mreuse = *local;
2684     ierr   = PetscFree(local);CHKERRQ(ierr);
2685   }
2686 
2687   /*
2688       m - number of local rows
2689       n - number of columns (same on all processors)
2690       rstart - first row in new global matrix generated
2691   */
2692   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2693   if (call == MAT_INITIAL_MATRIX) {
2694     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2695     ii  = aij->i;
2696     jj  = aij->j;
2697 
2698     /*
2699         Determine the number of non-zeros in the diagonal and off-diagonal
2700         portions of the matrix in order to do correct preallocation
2701     */
2702 
2703     /* first get start and end of "diagonal" columns */
2704     if (csize == PETSC_DECIDE) {
2705       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2706       if (mglobal == n) { /* square matrix */
2707 	nlocal = m;
2708       } else {
2709         nlocal = n/size + ((n % size) > rank);
2710       }
2711     } else {
2712       nlocal = csize;
2713     }
2714     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2715     rstart = rend - nlocal;
2716     if (rank == size - 1 && rend != n) {
2717       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2718     }
2719 
2720     /* next, compute all the lengths */
2721     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2722     olens = dlens + m;
2723     for (i=0; i<m; i++) {
2724       jend = ii[i+1] - ii[i];
2725       olen = 0;
2726       dlen = 0;
2727       for (j=0; j<jend; j++) {
2728         if (*jj < rstart || *jj >= rend) olen++;
2729         else dlen++;
2730         jj++;
2731       }
2732       olens[i] = olen;
2733       dlens[i] = dlen;
2734     }
2735     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2736     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2737     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2738     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2739     ierr = PetscFree(dlens);CHKERRQ(ierr);
2740   } else {
2741     PetscInt ml,nl;
2742 
2743     M = *newmat;
2744     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2745     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2746     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2747     /*
2748          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2749        rather than the slower MatSetValues().
2750     */
2751     M->was_assembled = PETSC_TRUE;
2752     M->assembled     = PETSC_FALSE;
2753   }
2754   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2755   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2756   ii  = aij->i;
2757   jj  = aij->j;
2758   aa  = aij->a;
2759   for (i=0; i<m; i++) {
2760     row   = rstart + i;
2761     nz    = ii[i+1] - ii[i];
2762     cwork = jj;     jj += nz;
2763     vwork = aa;     aa += nz;
2764     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2765   }
2766 
2767   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2768   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2769   *newmat = M;
2770 
2771   /* save submatrix used in processor for next request */
2772   if (call ==  MAT_INITIAL_MATRIX) {
2773     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2774     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2775   }
2776 
2777   PetscFunctionReturn(0);
2778 }
2779 
2780 EXTERN_C_BEGIN
2781 #undef __FUNCT__
2782 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2783 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2784 {
2785   PetscInt       m,cstart, cend,j,nnz,i,d;
2786   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2787   const PetscInt *JJ;
2788   PetscScalar    *values;
2789   PetscErrorCode ierr;
2790 
2791   PetscFunctionBegin;
2792   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2793 
2794   B->rmap.bs = B->cmap.bs = 1;
2795   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2796   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2797   m      = B->rmap.n;
2798   cstart = B->cmap.rstart;
2799   cend   = B->cmap.rend;
2800   rstart = B->rmap.rstart;
2801 
2802   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2803   o_nnz = d_nnz + m;
2804 
2805   for (i=0; i<m; i++) {
2806     nnz     = Ii[i+1]- Ii[i];
2807     JJ      = J + Ii[i];
2808     nnz_max = PetscMax(nnz_max,nnz);
2809     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2810     for (j=0; j<nnz; j++) {
2811       if (*JJ >= cstart) break;
2812       JJ++;
2813     }
2814     d = 0;
2815     for (; j<nnz; j++) {
2816       if (*JJ++ >= cend) break;
2817       d++;
2818     }
2819     d_nnz[i] = d;
2820     o_nnz[i] = nnz - d;
2821   }
2822   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2823   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2824 
2825   if (v) values = (PetscScalar*)v;
2826   else {
2827     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2828     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2829   }
2830 
2831   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2832   for (i=0; i<m; i++) {
2833     ii   = i + rstart;
2834     nnz  = Ii[i+1]- Ii[i];
2835     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2836   }
2837   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2838   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2839   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2840 
2841   if (!v) {
2842     ierr = PetscFree(values);CHKERRQ(ierr);
2843   }
2844   PetscFunctionReturn(0);
2845 }
2846 EXTERN_C_END
2847 
2848 #undef __FUNCT__
2849 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2850 /*@
2851    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2852    (the default parallel PETSc format).
2853 
2854    Collective on MPI_Comm
2855 
2856    Input Parameters:
2857 +  B - the matrix
2858 .  i - the indices into j for the start of each local row (starts with zero)
2859 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2860 -  v - optional values in the matrix
2861 
2862    Level: developer
2863 
2864    Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal
2865      storage format. Thus changing the values in a[] after this call will not effect the matrix values.
2866 
2867 .keywords: matrix, aij, compressed row, sparse, parallel
2868 
2869 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
2870           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
2871 @*/
2872 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2873 {
2874   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2875 
2876   PetscFunctionBegin;
2877   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2878   if (f) {
2879     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2880   }
2881   PetscFunctionReturn(0);
2882 }
2883 
2884 #undef __FUNCT__
2885 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2886 /*@C
2887    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2888    (the default parallel PETSc format).  For good matrix assembly performance
2889    the user should preallocate the matrix storage by setting the parameters
2890    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2891    performance can be increased by more than a factor of 50.
2892 
2893    Collective on MPI_Comm
2894 
2895    Input Parameters:
2896 +  A - the matrix
2897 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2898            (same value is used for all local rows)
2899 .  d_nnz - array containing the number of nonzeros in the various rows of the
2900            DIAGONAL portion of the local submatrix (possibly different for each row)
2901            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2902            The size of this array is equal to the number of local rows, i.e 'm'.
2903            You must leave room for the diagonal entry even if it is zero.
2904 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2905            submatrix (same value is used for all local rows).
2906 -  o_nnz - array containing the number of nonzeros in the various rows of the
2907            OFF-DIAGONAL portion of the local submatrix (possibly different for
2908            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2909            structure. The size of this array is equal to the number
2910            of local rows, i.e 'm'.
2911 
2912    If the *_nnz parameter is given then the *_nz parameter is ignored
2913 
2914    The AIJ format (also called the Yale sparse matrix format or
2915    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2916    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2917 
2918    The parallel matrix is partitioned such that the first m0 rows belong to
2919    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2920    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2921 
2922    The DIAGONAL portion of the local submatrix of a processor can be defined
2923    as the submatrix which is obtained by extraction the part corresponding
2924    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2925    first row that belongs to the processor, and r2 is the last row belonging
2926    to the this processor. This is a square mxm matrix. The remaining portion
2927    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2928 
2929    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2930 
2931    Example usage:
2932 
2933    Consider the following 8x8 matrix with 34 non-zero values, that is
2934    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2935    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2936    as follows:
2937 
2938 .vb
2939             1  2  0  |  0  3  0  |  0  4
2940     Proc0   0  5  6  |  7  0  0  |  8  0
2941             9  0 10  | 11  0  0  | 12  0
2942     -------------------------------------
2943            13  0 14  | 15 16 17  |  0  0
2944     Proc1   0 18  0  | 19 20 21  |  0  0
2945             0  0  0  | 22 23  0  | 24  0
2946     -------------------------------------
2947     Proc2  25 26 27  |  0  0 28  | 29  0
2948            30  0  0  | 31 32 33  |  0 34
2949 .ve
2950 
2951    This can be represented as a collection of submatrices as:
2952 
2953 .vb
2954       A B C
2955       D E F
2956       G H I
2957 .ve
2958 
2959    Where the submatrices A,B,C are owned by proc0, D,E,F are
2960    owned by proc1, G,H,I are owned by proc2.
2961 
2962    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2963    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2964    The 'M','N' parameters are 8,8, and have the same values on all procs.
2965 
2966    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2967    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2968    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2969    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2970    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2971    matrix, ans [DF] as another SeqAIJ matrix.
2972 
2973    When d_nz, o_nz parameters are specified, d_nz storage elements are
2974    allocated for every row of the local diagonal submatrix, and o_nz
2975    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2976    One way to choose d_nz and o_nz is to use the max nonzerors per local
2977    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2978    In this case, the values of d_nz,o_nz are:
2979 .vb
2980      proc0 : dnz = 2, o_nz = 2
2981      proc1 : dnz = 3, o_nz = 2
2982      proc2 : dnz = 1, o_nz = 4
2983 .ve
2984    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2985    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2986    for proc3. i.e we are using 12+15+10=37 storage locations to store
2987    34 values.
2988 
2989    When d_nnz, o_nnz parameters are specified, the storage is specified
2990    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2991    In the above case the values for d_nnz,o_nnz are:
2992 .vb
2993      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2994      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2995      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2996 .ve
2997    Here the space allocated is sum of all the above values i.e 34, and
2998    hence pre-allocation is perfect.
2999 
3000    Level: intermediate
3001 
3002 .keywords: matrix, aij, compressed row, sparse, parallel
3003 
3004 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3005           MPIAIJ
3006 @*/
3007 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3008 {
3009   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3010 
3011   PetscFunctionBegin;
3012   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3013   if (f) {
3014     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3015   }
3016   PetscFunctionReturn(0);
3017 }
3018 
3019 #undef __FUNCT__
3020 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3021 /*@C
3022      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3023          CSR format the local rows.
3024 
3025    Collective on MPI_Comm
3026 
3027    Input Parameters:
3028 +  comm - MPI communicator
3029 .  m - number of local rows (Cannot be PETSC_DECIDE)
3030 .  n - This value should be the same as the local size used in creating the
3031        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3032        calculated if N is given) For square matrices n is almost always m.
3033 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3034 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3035 .   i - row indices
3036 .   j - column indices
3037 -   a - matrix values
3038 
3039    Output Parameter:
3040 .   mat - the matrix
3041 
3042    Level: intermediate
3043 
3044    Notes:
3045        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3046      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3047      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3048 
3049        The i and j indices are 0 based
3050 
3051 .keywords: matrix, aij, compressed row, sparse, parallel
3052 
3053 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3054           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3055 @*/
3056 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3057 {
3058   PetscErrorCode ierr;
3059 
3060  PetscFunctionBegin;
3061   if (i[0]) {
3062     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3063   }
3064   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3065   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3066   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3067   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3068   PetscFunctionReturn(0);
3069 }
3070 
3071 #undef __FUNCT__
3072 #define __FUNCT__ "MatCreateMPIAIJ"
3073 /*@C
3074    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3075    (the default parallel PETSc format).  For good matrix assembly performance
3076    the user should preallocate the matrix storage by setting the parameters
3077    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3078    performance can be increased by more than a factor of 50.
3079 
3080    Collective on MPI_Comm
3081 
3082    Input Parameters:
3083 +  comm - MPI communicator
3084 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3085            This value should be the same as the local size used in creating the
3086            y vector for the matrix-vector product y = Ax.
3087 .  n - This value should be the same as the local size used in creating the
3088        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3089        calculated if N is given) For square matrices n is almost always m.
3090 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3091 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3092 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3093            (same value is used for all local rows)
3094 .  d_nnz - array containing the number of nonzeros in the various rows of the
3095            DIAGONAL portion of the local submatrix (possibly different for each row)
3096            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3097            The size of this array is equal to the number of local rows, i.e 'm'.
3098            You must leave room for the diagonal entry even if it is zero.
3099 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3100            submatrix (same value is used for all local rows).
3101 -  o_nnz - array containing the number of nonzeros in the various rows of the
3102            OFF-DIAGONAL portion of the local submatrix (possibly different for
3103            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3104            structure. The size of this array is equal to the number
3105            of local rows, i.e 'm'.
3106 
3107    Output Parameter:
3108 .  A - the matrix
3109 
3110    Notes:
3111    If the *_nnz parameter is given then the *_nz parameter is ignored
3112 
3113    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3114    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3115    storage requirements for this matrix.
3116 
3117    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3118    processor than it must be used on all processors that share the object for
3119    that argument.
3120 
3121    The user MUST specify either the local or global matrix dimensions
3122    (possibly both).
3123 
3124    The parallel matrix is partitioned across processors such that the
3125    first m0 rows belong to process 0, the next m1 rows belong to
3126    process 1, the next m2 rows belong to process 2 etc.. where
3127    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3128    values corresponding to [m x N] submatrix.
3129 
3130    The columns are logically partitioned with the n0 columns belonging
3131    to 0th partition, the next n1 columns belonging to the next
3132    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3133 
3134    The DIAGONAL portion of the local submatrix on any given processor
3135    is the submatrix corresponding to the rows and columns m,n
3136    corresponding to the given processor. i.e diagonal matrix on
3137    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3138    etc. The remaining portion of the local submatrix [m x (N-n)]
3139    constitute the OFF-DIAGONAL portion. The example below better
3140    illustrates this concept.
3141 
3142    For a square global matrix we define each processor's diagonal portion
3143    to be its local rows and the corresponding columns (a square submatrix);
3144    each processor's off-diagonal portion encompasses the remainder of the
3145    local matrix (a rectangular submatrix).
3146 
3147    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3148 
3149    When calling this routine with a single process communicator, a matrix of
3150    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3151    type of communicator, use the construction mechanism:
3152      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3153 
3154    By default, this format uses inodes (identical nodes) when possible.
3155    We search for consecutive rows with the same nonzero structure, thereby
3156    reusing matrix information to achieve increased efficiency.
3157 
3158    Options Database Keys:
3159 +  -mat_no_inode  - Do not use inodes
3160 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3161 -  -mat_aij_oneindex - Internally use indexing starting at 1
3162         rather than 0.  Note that when calling MatSetValues(),
3163         the user still MUST index entries starting at 0!
3164 
3165 
3166    Example usage:
3167 
3168    Consider the following 8x8 matrix with 34 non-zero values, that is
3169    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3170    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3171    as follows:
3172 
3173 .vb
3174             1  2  0  |  0  3  0  |  0  4
3175     Proc0   0  5  6  |  7  0  0  |  8  0
3176             9  0 10  | 11  0  0  | 12  0
3177     -------------------------------------
3178            13  0 14  | 15 16 17  |  0  0
3179     Proc1   0 18  0  | 19 20 21  |  0  0
3180             0  0  0  | 22 23  0  | 24  0
3181     -------------------------------------
3182     Proc2  25 26 27  |  0  0 28  | 29  0
3183            30  0  0  | 31 32 33  |  0 34
3184 .ve
3185 
3186    This can be represented as a collection of submatrices as:
3187 
3188 .vb
3189       A B C
3190       D E F
3191       G H I
3192 .ve
3193 
3194    Where the submatrices A,B,C are owned by proc0, D,E,F are
3195    owned by proc1, G,H,I are owned by proc2.
3196 
3197    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3198    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3199    The 'M','N' parameters are 8,8, and have the same values on all procs.
3200 
3201    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3202    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3203    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3204    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3205    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3206    matrix, ans [DF] as another SeqAIJ matrix.
3207 
3208    When d_nz, o_nz parameters are specified, d_nz storage elements are
3209    allocated for every row of the local diagonal submatrix, and o_nz
3210    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3211    One way to choose d_nz and o_nz is to use the max nonzerors per local
3212    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3213    In this case, the values of d_nz,o_nz are:
3214 .vb
3215      proc0 : dnz = 2, o_nz = 2
3216      proc1 : dnz = 3, o_nz = 2
3217      proc2 : dnz = 1, o_nz = 4
3218 .ve
3219    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3220    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3221    for proc3. i.e we are using 12+15+10=37 storage locations to store
3222    34 values.
3223 
3224    When d_nnz, o_nnz parameters are specified, the storage is specified
3225    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3226    In the above case the values for d_nnz,o_nnz are:
3227 .vb
3228      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3229      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3230      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3231 .ve
3232    Here the space allocated is sum of all the above values i.e 34, and
3233    hence pre-allocation is perfect.
3234 
3235    Level: intermediate
3236 
3237 .keywords: matrix, aij, compressed row, sparse, parallel
3238 
3239 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3240           MPIAIJ, MatCreateMPIAIJWithArrays()
3241 @*/
3242 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3243 {
3244   PetscErrorCode ierr;
3245   PetscMPIInt    size;
3246 
3247   PetscFunctionBegin;
3248   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3249   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3250   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3251   if (size > 1) {
3252     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3253     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3254   } else {
3255     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3256     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3257   }
3258   PetscFunctionReturn(0);
3259 }
3260 
3261 #undef __FUNCT__
3262 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3263 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3264 {
3265   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3266 
3267   PetscFunctionBegin;
3268   *Ad     = a->A;
3269   *Ao     = a->B;
3270   *colmap = a->garray;
3271   PetscFunctionReturn(0);
3272 }
3273 
3274 #undef __FUNCT__
3275 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3276 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3277 {
3278   PetscErrorCode ierr;
3279   PetscInt       i;
3280   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3281 
3282   PetscFunctionBegin;
3283   if (coloring->ctype == IS_COLORING_GLOBAL) {
3284     ISColoringValue *allcolors,*colors;
3285     ISColoring      ocoloring;
3286 
3287     /* set coloring for diagonal portion */
3288     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3289 
3290     /* set coloring for off-diagonal portion */
3291     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3292     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3293     for (i=0; i<a->B->cmap.n; i++) {
3294       colors[i] = allcolors[a->garray[i]];
3295     }
3296     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3297     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3298     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3299     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3300   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3301     ISColoringValue *colors;
3302     PetscInt        *larray;
3303     ISColoring      ocoloring;
3304 
3305     /* set coloring for diagonal portion */
3306     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3307     for (i=0; i<a->A->cmap.n; i++) {
3308       larray[i] = i + A->cmap.rstart;
3309     }
3310     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3311     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3312     for (i=0; i<a->A->cmap.n; i++) {
3313       colors[i] = coloring->colors[larray[i]];
3314     }
3315     ierr = PetscFree(larray);CHKERRQ(ierr);
3316     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3317     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3318     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3319 
3320     /* set coloring for off-diagonal portion */
3321     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3322     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3323     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3324     for (i=0; i<a->B->cmap.n; i++) {
3325       colors[i] = coloring->colors[larray[i]];
3326     }
3327     ierr = PetscFree(larray);CHKERRQ(ierr);
3328     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3329     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3330     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3331   } else {
3332     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3333   }
3334 
3335   PetscFunctionReturn(0);
3336 }
3337 
3338 #if defined(PETSC_HAVE_ADIC)
3339 #undef __FUNCT__
3340 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3341 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3342 {
3343   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3344   PetscErrorCode ierr;
3345 
3346   PetscFunctionBegin;
3347   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3348   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3349   PetscFunctionReturn(0);
3350 }
3351 #endif
3352 
3353 #undef __FUNCT__
3354 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3355 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3356 {
3357   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3358   PetscErrorCode ierr;
3359 
3360   PetscFunctionBegin;
3361   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3362   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3363   PetscFunctionReturn(0);
3364 }
3365 
3366 #undef __FUNCT__
3367 #define __FUNCT__ "MatMerge"
3368 /*@C
3369       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3370                  matrices from each processor
3371 
3372     Collective on MPI_Comm
3373 
3374    Input Parameters:
3375 +    comm - the communicators the parallel matrix will live on
3376 .    inmat - the input sequential matrices
3377 .    n - number of local columns (or PETSC_DECIDE)
3378 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3379 
3380    Output Parameter:
3381 .    outmat - the parallel matrix generated
3382 
3383     Level: advanced
3384 
3385    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3386 
3387 @*/
3388 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3389 {
3390   PetscErrorCode ierr;
3391   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3392   PetscInt       *indx;
3393   PetscScalar    *values;
3394 
3395   PetscFunctionBegin;
3396   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3397   if (scall == MAT_INITIAL_MATRIX){
3398     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3399     if (n == PETSC_DECIDE){
3400       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3401     }
3402     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3403     rstart -= m;
3404 
3405     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3406     for (i=0;i<m;i++) {
3407       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3408       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3409       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3410     }
3411     /* This routine will ONLY return MPIAIJ type matrix */
3412     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3413     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3414     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3415     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3416     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3417 
3418   } else if (scall == MAT_REUSE_MATRIX){
3419     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3420   } else {
3421     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3422   }
3423 
3424   for (i=0;i<m;i++) {
3425     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3426     Ii    = i + rstart;
3427     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3428     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3429   }
3430   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3431   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3432   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3433 
3434   PetscFunctionReturn(0);
3435 }
3436 
3437 #undef __FUNCT__
3438 #define __FUNCT__ "MatFileSplit"
3439 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3440 {
3441   PetscErrorCode    ierr;
3442   PetscMPIInt       rank;
3443   PetscInt          m,N,i,rstart,nnz;
3444   size_t            len;
3445   const PetscInt    *indx;
3446   PetscViewer       out;
3447   char              *name;
3448   Mat               B;
3449   const PetscScalar *values;
3450 
3451   PetscFunctionBegin;
3452   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3453   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3454   /* Should this be the type of the diagonal block of A? */
3455   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3456   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3457   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3458   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3459   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3460   for (i=0;i<m;i++) {
3461     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3462     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3463     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3464   }
3465   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3466   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3467 
3468   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
3469   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3470   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3471   sprintf(name,"%s.%d",outfile,rank);
3472   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3473   ierr = PetscFree(name);
3474   ierr = MatView(B,out);CHKERRQ(ierr);
3475   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3476   ierr = MatDestroy(B);CHKERRQ(ierr);
3477   PetscFunctionReturn(0);
3478 }
3479 
3480 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3481 #undef __FUNCT__
3482 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3483 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3484 {
3485   PetscErrorCode       ierr;
3486   Mat_Merge_SeqsToMPI  *merge;
3487   PetscContainer       container;
3488 
3489   PetscFunctionBegin;
3490   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3491   if (container) {
3492     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3493     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3494     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3495     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3496     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3497     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3498     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3499     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3500     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3501     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3502     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3503     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3504 
3505     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3506     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3507   }
3508   ierr = PetscFree(merge);CHKERRQ(ierr);
3509 
3510   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3511   PetscFunctionReturn(0);
3512 }
3513 
3514 #include "src/mat/utils/freespace.h"
3515 #include "petscbt.h"
3516 static PetscEvent logkey_seqstompinum = 0;
3517 #undef __FUNCT__
3518 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3519 /*@C
3520       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3521                  matrices from each processor
3522 
3523     Collective on MPI_Comm
3524 
3525    Input Parameters:
3526 +    comm - the communicators the parallel matrix will live on
3527 .    seqmat - the input sequential matrices
3528 .    m - number of local rows (or PETSC_DECIDE)
3529 .    n - number of local columns (or PETSC_DECIDE)
3530 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3531 
3532    Output Parameter:
3533 .    mpimat - the parallel matrix generated
3534 
3535     Level: advanced
3536 
3537    Notes:
3538      The dimensions of the sequential matrix in each processor MUST be the same.
3539      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3540      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3541 @*/
3542 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3543 {
3544   PetscErrorCode       ierr;
3545   MPI_Comm             comm=mpimat->comm;
3546   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3547   PetscMPIInt          size,rank,taga,*len_s;
3548   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3549   PetscInt             proc,m;
3550   PetscInt             **buf_ri,**buf_rj;
3551   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3552   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3553   MPI_Request          *s_waits,*r_waits;
3554   MPI_Status           *status;
3555   MatScalar            *aa=a->a,**abuf_r,*ba_i;
3556   Mat_Merge_SeqsToMPI  *merge;
3557   PetscContainer       container;
3558 
3559   PetscFunctionBegin;
3560   if (!logkey_seqstompinum) {
3561     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
3562   }
3563   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3564 
3565   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3566   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3567 
3568   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3569   if (container) {
3570     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3571   }
3572   bi     = merge->bi;
3573   bj     = merge->bj;
3574   buf_ri = merge->buf_ri;
3575   buf_rj = merge->buf_rj;
3576 
3577   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3578   owners = merge->rowmap.range;
3579   len_s  = merge->len_s;
3580 
3581   /* send and recv matrix values */
3582   /*-----------------------------*/
3583   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3584   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3585 
3586   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3587   for (proc=0,k=0; proc<size; proc++){
3588     if (!len_s[proc]) continue;
3589     i = owners[proc];
3590     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3591     k++;
3592   }
3593 
3594   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3595   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3596   ierr = PetscFree(status);CHKERRQ(ierr);
3597 
3598   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3599   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3600 
3601   /* insert mat values of mpimat */
3602   /*----------------------------*/
3603   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3604   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3605   nextrow = buf_ri_k + merge->nrecv;
3606   nextai  = nextrow + merge->nrecv;
3607 
3608   for (k=0; k<merge->nrecv; k++){
3609     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3610     nrows = *(buf_ri_k[k]);
3611     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3612     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3613   }
3614 
3615   /* set values of ba */
3616   m = merge->rowmap.n;
3617   for (i=0; i<m; i++) {
3618     arow = owners[rank] + i;
3619     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3620     bnzi = bi[i+1] - bi[i];
3621     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3622 
3623     /* add local non-zero vals of this proc's seqmat into ba */
3624     anzi = ai[arow+1] - ai[arow];
3625     aj   = a->j + ai[arow];
3626     aa   = a->a + ai[arow];
3627     nextaj = 0;
3628     for (j=0; nextaj<anzi; j++){
3629       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3630         ba_i[j] += aa[nextaj++];
3631       }
3632     }
3633 
3634     /* add received vals into ba */
3635     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3636       /* i-th row */
3637       if (i == *nextrow[k]) {
3638         anzi = *(nextai[k]+1) - *nextai[k];
3639         aj   = buf_rj[k] + *(nextai[k]);
3640         aa   = abuf_r[k] + *(nextai[k]);
3641         nextaj = 0;
3642         for (j=0; nextaj<anzi; j++){
3643           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3644             ba_i[j] += aa[nextaj++];
3645           }
3646         }
3647         nextrow[k]++; nextai[k]++;
3648       }
3649     }
3650     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3651   }
3652   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3653   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3654 
3655   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3656   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3657   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3658   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3659   PetscFunctionReturn(0);
3660 }
3661 
3662 static PetscEvent logkey_seqstompisym = 0;
3663 #undef __FUNCT__
3664 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3665 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3666 {
3667   PetscErrorCode       ierr;
3668   Mat                  B_mpi;
3669   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3670   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3671   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3672   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3673   PetscInt             len,proc,*dnz,*onz;
3674   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3675   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3676   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3677   MPI_Status           *status;
3678   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3679   PetscBT              lnkbt;
3680   Mat_Merge_SeqsToMPI  *merge;
3681   PetscContainer       container;
3682 
3683   PetscFunctionBegin;
3684   if (!logkey_seqstompisym) {
3685     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3686   }
3687   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3688 
3689   /* make sure it is a PETSc comm */
3690   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3691   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3692   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3693 
3694   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3695   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3696 
3697   /* determine row ownership */
3698   /*---------------------------------------------------------*/
3699   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3700   merge->rowmap.n = m;
3701   merge->rowmap.N = M;
3702   merge->rowmap.bs = 1;
3703   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
3704   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3705   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3706 
3707   m      = merge->rowmap.n;
3708   M      = merge->rowmap.N;
3709   owners = merge->rowmap.range;
3710 
3711   /* determine the number of messages to send, their lengths */
3712   /*---------------------------------------------------------*/
3713   len_s  = merge->len_s;
3714 
3715   len = 0;  /* length of buf_si[] */
3716   merge->nsend = 0;
3717   for (proc=0; proc<size; proc++){
3718     len_si[proc] = 0;
3719     if (proc == rank){
3720       len_s[proc] = 0;
3721     } else {
3722       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3723       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3724     }
3725     if (len_s[proc]) {
3726       merge->nsend++;
3727       nrows = 0;
3728       for (i=owners[proc]; i<owners[proc+1]; i++){
3729         if (ai[i+1] > ai[i]) nrows++;
3730       }
3731       len_si[proc] = 2*(nrows+1);
3732       len += len_si[proc];
3733     }
3734   }
3735 
3736   /* determine the number and length of messages to receive for ij-structure */
3737   /*-------------------------------------------------------------------------*/
3738   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3739   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3740 
3741   /* post the Irecv of j-structure */
3742   /*-------------------------------*/
3743   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
3744   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3745 
3746   /* post the Isend of j-structure */
3747   /*--------------------------------*/
3748   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3749   sj_waits = si_waits + merge->nsend;
3750 
3751   for (proc=0, k=0; proc<size; proc++){
3752     if (!len_s[proc]) continue;
3753     i = owners[proc];
3754     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3755     k++;
3756   }
3757 
3758   /* receives and sends of j-structure are complete */
3759   /*------------------------------------------------*/
3760   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3761   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3762 
3763   /* send and recv i-structure */
3764   /*---------------------------*/
3765   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
3766   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3767 
3768   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3769   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3770   for (proc=0,k=0; proc<size; proc++){
3771     if (!len_s[proc]) continue;
3772     /* form outgoing message for i-structure:
3773          buf_si[0]:                 nrows to be sent
3774                [1:nrows]:           row index (global)
3775                [nrows+1:2*nrows+1]: i-structure index
3776     */
3777     /*-------------------------------------------*/
3778     nrows = len_si[proc]/2 - 1;
3779     buf_si_i    = buf_si + nrows+1;
3780     buf_si[0]   = nrows;
3781     buf_si_i[0] = 0;
3782     nrows = 0;
3783     for (i=owners[proc]; i<owners[proc+1]; i++){
3784       anzi = ai[i+1] - ai[i];
3785       if (anzi) {
3786         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3787         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3788         nrows++;
3789       }
3790     }
3791     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3792     k++;
3793     buf_si += len_si[proc];
3794   }
3795 
3796   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3797   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3798 
3799   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3800   for (i=0; i<merge->nrecv; i++){
3801     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3802   }
3803 
3804   ierr = PetscFree(len_si);CHKERRQ(ierr);
3805   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3806   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3807   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3808   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3809   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3810   ierr = PetscFree(status);CHKERRQ(ierr);
3811 
3812   /* compute a local seq matrix in each processor */
3813   /*----------------------------------------------*/
3814   /* allocate bi array and free space for accumulating nonzero column info */
3815   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3816   bi[0] = 0;
3817 
3818   /* create and initialize a linked list */
3819   nlnk = N+1;
3820   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3821 
3822   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3823   len = 0;
3824   len  = ai[owners[rank+1]] - ai[owners[rank]];
3825   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3826   current_space = free_space;
3827 
3828   /* determine symbolic info for each local row */
3829   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3830   nextrow = buf_ri_k + merge->nrecv;
3831   nextai  = nextrow + merge->nrecv;
3832   for (k=0; k<merge->nrecv; k++){
3833     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3834     nrows = *buf_ri_k[k];
3835     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3836     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3837   }
3838 
3839   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3840   len = 0;
3841   for (i=0;i<m;i++) {
3842     bnzi   = 0;
3843     /* add local non-zero cols of this proc's seqmat into lnk */
3844     arow   = owners[rank] + i;
3845     anzi   = ai[arow+1] - ai[arow];
3846     aj     = a->j + ai[arow];
3847     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3848     bnzi += nlnk;
3849     /* add received col data into lnk */
3850     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3851       if (i == *nextrow[k]) { /* i-th row */
3852         anzi = *(nextai[k]+1) - *nextai[k];
3853         aj   = buf_rj[k] + *nextai[k];
3854         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3855         bnzi += nlnk;
3856         nextrow[k]++; nextai[k]++;
3857       }
3858     }
3859     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3860 
3861     /* if free space is not available, make more free space */
3862     if (current_space->local_remaining<bnzi) {
3863       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3864       nspacedouble++;
3865     }
3866     /* copy data into free space, then initialize lnk */
3867     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3868     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3869 
3870     current_space->array           += bnzi;
3871     current_space->local_used      += bnzi;
3872     current_space->local_remaining -= bnzi;
3873 
3874     bi[i+1] = bi[i] + bnzi;
3875   }
3876 
3877   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3878 
3879   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3880   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3881   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3882 
3883   /* create symbolic parallel matrix B_mpi */
3884   /*---------------------------------------*/
3885   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3886   if (n==PETSC_DECIDE) {
3887     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3888   } else {
3889     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3890   }
3891   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3892   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3893   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3894 
3895   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3896   B_mpi->assembled     = PETSC_FALSE;
3897   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3898   merge->bi            = bi;
3899   merge->bj            = bj;
3900   merge->buf_ri        = buf_ri;
3901   merge->buf_rj        = buf_rj;
3902   merge->coi           = PETSC_NULL;
3903   merge->coj           = PETSC_NULL;
3904   merge->owners_co     = PETSC_NULL;
3905 
3906   /* attach the supporting struct to B_mpi for reuse */
3907   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3908   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
3909   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3910   *mpimat = B_mpi;
3911 
3912   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3913   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3914   PetscFunctionReturn(0);
3915 }
3916 
3917 static PetscEvent logkey_seqstompi = 0;
3918 #undef __FUNCT__
3919 #define __FUNCT__ "MatMerge_SeqsToMPI"
3920 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3921 {
3922   PetscErrorCode   ierr;
3923 
3924   PetscFunctionBegin;
3925   if (!logkey_seqstompi) {
3926     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3927   }
3928   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3929   if (scall == MAT_INITIAL_MATRIX){
3930     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3931   }
3932   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3933   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3934   PetscFunctionReturn(0);
3935 }
3936 static PetscEvent logkey_getlocalmat = 0;
3937 #undef __FUNCT__
3938 #define __FUNCT__ "MatGetLocalMat"
3939 /*@C
3940      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3941 
3942     Not Collective
3943 
3944    Input Parameters:
3945 +    A - the matrix
3946 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3947 
3948    Output Parameter:
3949 .    A_loc - the local sequential matrix generated
3950 
3951     Level: developer
3952 
3953 @*/
3954 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3955 {
3956   PetscErrorCode  ierr;
3957   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3958   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3959   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3960   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3961   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
3962   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3963 
3964   PetscFunctionBegin;
3965   if (!logkey_getlocalmat) {
3966     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3967   }
3968   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3969   if (scall == MAT_INITIAL_MATRIX){
3970     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3971     ci[0] = 0;
3972     for (i=0; i<am; i++){
3973       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3974     }
3975     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3976     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3977     k = 0;
3978     for (i=0; i<am; i++) {
3979       ncols_o = bi[i+1] - bi[i];
3980       ncols_d = ai[i+1] - ai[i];
3981       /* off-diagonal portion of A */
3982       for (jo=0; jo<ncols_o; jo++) {
3983         col = cmap[*bj];
3984         if (col >= cstart) break;
3985         cj[k]   = col; bj++;
3986         ca[k++] = *ba++;
3987       }
3988       /* diagonal portion of A */
3989       for (j=0; j<ncols_d; j++) {
3990         cj[k]   = cstart + *aj++;
3991         ca[k++] = *aa++;
3992       }
3993       /* off-diagonal portion of A */
3994       for (j=jo; j<ncols_o; j++) {
3995         cj[k]   = cmap[*bj++];
3996         ca[k++] = *ba++;
3997       }
3998     }
3999     /* put together the new matrix */
4000     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4001     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4002     /* Since these are PETSc arrays, change flags to free them as necessary. */
4003     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4004     mat->free_a  = PETSC_TRUE;
4005     mat->free_ij = PETSC_TRUE;
4006     mat->nonew   = 0;
4007   } else if (scall == MAT_REUSE_MATRIX){
4008     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4009     ci = mat->i; cj = mat->j; ca = mat->a;
4010     for (i=0; i<am; i++) {
4011       /* off-diagonal portion of A */
4012       ncols_o = bi[i+1] - bi[i];
4013       for (jo=0; jo<ncols_o; jo++) {
4014         col = cmap[*bj];
4015         if (col >= cstart) break;
4016         *ca++ = *ba++; bj++;
4017       }
4018       /* diagonal portion of A */
4019       ncols_d = ai[i+1] - ai[i];
4020       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
4021       /* off-diagonal portion of A */
4022       for (j=jo; j<ncols_o; j++) {
4023         *ca++ = *ba++; bj++;
4024       }
4025     }
4026   } else {
4027     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4028   }
4029 
4030   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
4031   PetscFunctionReturn(0);
4032 }
4033 
4034 static PetscEvent logkey_getlocalmatcondensed = 0;
4035 #undef __FUNCT__
4036 #define __FUNCT__ "MatGetLocalMatCondensed"
4037 /*@C
4038      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4039 
4040     Not Collective
4041 
4042    Input Parameters:
4043 +    A - the matrix
4044 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4045 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4046 
4047    Output Parameter:
4048 .    A_loc - the local sequential matrix generated
4049 
4050     Level: developer
4051 
4052 @*/
4053 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4054 {
4055   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4056   PetscErrorCode    ierr;
4057   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4058   IS                isrowa,iscola;
4059   Mat               *aloc;
4060 
4061   PetscFunctionBegin;
4062   if (!logkey_getlocalmatcondensed) {
4063     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
4064   }
4065   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4066   if (!row){
4067     start = A->rmap.rstart; end = A->rmap.rend;
4068     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4069   } else {
4070     isrowa = *row;
4071   }
4072   if (!col){
4073     start = A->cmap.rstart;
4074     cmap  = a->garray;
4075     nzA   = a->A->cmap.n;
4076     nzB   = a->B->cmap.n;
4077     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4078     ncols = 0;
4079     for (i=0; i<nzB; i++) {
4080       if (cmap[i] < start) idx[ncols++] = cmap[i];
4081       else break;
4082     }
4083     imark = i;
4084     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4085     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4086     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4087     ierr = PetscFree(idx);CHKERRQ(ierr);
4088   } else {
4089     iscola = *col;
4090   }
4091   if (scall != MAT_INITIAL_MATRIX){
4092     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4093     aloc[0] = *A_loc;
4094   }
4095   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4096   *A_loc = aloc[0];
4097   ierr = PetscFree(aloc);CHKERRQ(ierr);
4098   if (!row){
4099     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4100   }
4101   if (!col){
4102     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4103   }
4104   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4105   PetscFunctionReturn(0);
4106 }
4107 
4108 static PetscEvent logkey_GetBrowsOfAcols = 0;
4109 #undef __FUNCT__
4110 #define __FUNCT__ "MatGetBrowsOfAcols"
4111 /*@C
4112     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4113 
4114     Collective on Mat
4115 
4116    Input Parameters:
4117 +    A,B - the matrices in mpiaij format
4118 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4119 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4120 
4121    Output Parameter:
4122 +    rowb, colb - index sets of rows and columns of B to extract
4123 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
4124 -    B_seq - the sequential matrix generated
4125 
4126     Level: developer
4127 
4128 @*/
4129 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4130 {
4131   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4132   PetscErrorCode    ierr;
4133   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4134   IS                isrowb,iscolb;
4135   Mat               *bseq;
4136 
4137   PetscFunctionBegin;
4138   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4139     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4140   }
4141   if (!logkey_GetBrowsOfAcols) {
4142     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
4143   }
4144   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4145 
4146   if (scall == MAT_INITIAL_MATRIX){
4147     start = A->cmap.rstart;
4148     cmap  = a->garray;
4149     nzA   = a->A->cmap.n;
4150     nzB   = a->B->cmap.n;
4151     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4152     ncols = 0;
4153     for (i=0; i<nzB; i++) {  /* row < local row index */
4154       if (cmap[i] < start) idx[ncols++] = cmap[i];
4155       else break;
4156     }
4157     imark = i;
4158     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4159     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4160     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4161     ierr = PetscFree(idx);CHKERRQ(ierr);
4162     *brstart = imark;
4163     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
4164   } else {
4165     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4166     isrowb = *rowb; iscolb = *colb;
4167     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4168     bseq[0] = *B_seq;
4169   }
4170   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4171   *B_seq = bseq[0];
4172   ierr = PetscFree(bseq);CHKERRQ(ierr);
4173   if (!rowb){
4174     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4175   } else {
4176     *rowb = isrowb;
4177   }
4178   if (!colb){
4179     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4180   } else {
4181     *colb = iscolb;
4182   }
4183   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4184   PetscFunctionReturn(0);
4185 }
4186 
4187 static PetscEvent logkey_GetBrowsOfAocols = 0;
4188 #undef __FUNCT__
4189 #define __FUNCT__ "MatGetBrowsOfAoCols"
4190 /*@C
4191     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4192     of the OFF-DIAGONAL portion of local A
4193 
4194     Collective on Mat
4195 
4196    Input Parameters:
4197 +    A,B - the matrices in mpiaij format
4198 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4199 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4200 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4201 
4202    Output Parameter:
4203 +    B_oth - the sequential matrix generated
4204 
4205     Level: developer
4206 
4207 @*/
4208 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
4209 {
4210   VecScatter_MPI_General *gen_to,*gen_from;
4211   PetscErrorCode         ierr;
4212   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4213   Mat_SeqAIJ             *b_oth;
4214   VecScatter             ctx=a->Mvctx;
4215   MPI_Comm               comm=ctx->comm;
4216   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
4217   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
4218   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
4219   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4220   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4221   MPI_Status             *sstatus,rstatus;
4222   PetscInt               *cols,sbs,rbs;
4223   PetscScalar            *vals;
4224 
4225   PetscFunctionBegin;
4226   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4227     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4228   }
4229   if (!logkey_GetBrowsOfAocols) {
4230     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
4231   }
4232   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4233   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4234 
4235   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4236   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4237   rvalues  = gen_from->values; /* holds the length of receiving row */
4238   svalues  = gen_to->values;   /* holds the length of sending row */
4239   nrecvs   = gen_from->n;
4240   nsends   = gen_to->n;
4241 
4242   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4243   srow     = gen_to->indices;   /* local row index to be sent */
4244   sstarts  = gen_to->starts;
4245   sprocs   = gen_to->procs;
4246   sstatus  = gen_to->sstatus;
4247   sbs      = gen_to->bs;
4248   rstarts  = gen_from->starts;
4249   rprocs   = gen_from->procs;
4250   rbs      = gen_from->bs;
4251 
4252   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4253   if (scall == MAT_INITIAL_MATRIX){
4254     /* i-array */
4255     /*---------*/
4256     /*  post receives */
4257     for (i=0; i<nrecvs; i++){
4258       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4259       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4260       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4261     }
4262 
4263     /* pack the outgoing message */
4264     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4265     rstartsj = sstartsj + nsends +1;
4266     sstartsj[0] = 0;  rstartsj[0] = 0;
4267     len = 0; /* total length of j or a array to be sent */
4268     k = 0;
4269     for (i=0; i<nsends; i++){
4270       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4271       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4272       for (j=0; j<nrows; j++) {
4273         row = srow[k] + B->rmap.range[rank]; /* global row idx */
4274         for (l=0; l<sbs; l++){
4275           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4276           rowlen[j*sbs+l] = ncols;
4277           len += ncols;
4278           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4279         }
4280         k++;
4281       }
4282       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4283       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4284     }
4285     /* recvs and sends of i-array are completed */
4286     i = nrecvs;
4287     while (i--) {
4288       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
4289     }
4290     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4291 
4292     /* allocate buffers for sending j and a arrays */
4293     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4294     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4295 
4296     /* create i-array of B_oth */
4297     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4298     b_othi[0] = 0;
4299     len = 0; /* total length of j or a array to be received */
4300     k = 0;
4301     for (i=0; i<nrecvs; i++){
4302       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4303       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4304       for (j=0; j<nrows; j++) {
4305         b_othi[k+1] = b_othi[k] + rowlen[j];
4306         len += rowlen[j]; k++;
4307       }
4308       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4309     }
4310 
4311     /* allocate space for j and a arrrays of B_oth */
4312     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4313     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
4314 
4315     /* j-array */
4316     /*---------*/
4317     /*  post receives of j-array */
4318     for (i=0; i<nrecvs; i++){
4319       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4320       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4321     }
4322 
4323     /* pack the outgoing message j-array */
4324     k = 0;
4325     for (i=0; i<nsends; i++){
4326       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4327       bufJ = bufj+sstartsj[i];
4328       for (j=0; j<nrows; j++) {
4329         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4330         for (ll=0; ll<sbs; ll++){
4331           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4332           for (l=0; l<ncols; l++){
4333             *bufJ++ = cols[l];
4334           }
4335           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4336         }
4337       }
4338       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4339     }
4340 
4341     /* recvs and sends of j-array are completed */
4342     i = nrecvs;
4343     while (i--) {
4344       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
4345     }
4346     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4347   } else if (scall == MAT_REUSE_MATRIX){
4348     sstartsj = *startsj;
4349     rstartsj = sstartsj + nsends +1;
4350     bufa     = *bufa_ptr;
4351     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4352     b_otha   = b_oth->a;
4353   } else {
4354     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4355   }
4356 
4357   /* a-array */
4358   /*---------*/
4359   /*  post receives of a-array */
4360   for (i=0; i<nrecvs; i++){
4361     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4362     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4363   }
4364 
4365   /* pack the outgoing message a-array */
4366   k = 0;
4367   for (i=0; i<nsends; i++){
4368     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4369     bufA = bufa+sstartsj[i];
4370     for (j=0; j<nrows; j++) {
4371       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4372       for (ll=0; ll<sbs; ll++){
4373         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4374         for (l=0; l<ncols; l++){
4375           *bufA++ = vals[l];
4376         }
4377         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4378       }
4379     }
4380     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4381   }
4382   /* recvs and sends of a-array are completed */
4383   i = nrecvs;
4384   while (i--) {
4385     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
4386   }
4387   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4388   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4389 
4390   if (scall == MAT_INITIAL_MATRIX){
4391     /* put together the new matrix */
4392     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4393 
4394     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4395     /* Since these are PETSc arrays, change flags to free them as necessary. */
4396     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4397     b_oth->free_a  = PETSC_TRUE;
4398     b_oth->free_ij = PETSC_TRUE;
4399     b_oth->nonew   = 0;
4400 
4401     ierr = PetscFree(bufj);CHKERRQ(ierr);
4402     if (!startsj || !bufa_ptr){
4403       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4404       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4405     } else {
4406       *startsj  = sstartsj;
4407       *bufa_ptr = bufa;
4408     }
4409   }
4410   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4411   PetscFunctionReturn(0);
4412 }
4413 
4414 #undef __FUNCT__
4415 #define __FUNCT__ "MatGetCommunicationStructs"
4416 /*@C
4417   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4418 
4419   Not Collective
4420 
4421   Input Parameters:
4422 . A - The matrix in mpiaij format
4423 
4424   Output Parameter:
4425 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4426 . colmap - A map from global column index to local index into lvec
4427 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4428 
4429   Level: developer
4430 
4431 @*/
4432 #if defined (PETSC_USE_CTABLE)
4433 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4434 #else
4435 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4436 #endif
4437 {
4438   Mat_MPIAIJ *a;
4439 
4440   PetscFunctionBegin;
4441   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4442   PetscValidPointer(lvec, 2)
4443   PetscValidPointer(colmap, 3)
4444   PetscValidPointer(multScatter, 4)
4445   a = (Mat_MPIAIJ *) A->data;
4446   if (lvec) *lvec = a->lvec;
4447   if (colmap) *colmap = a->colmap;
4448   if (multScatter) *multScatter = a->Mvctx;
4449   PetscFunctionReturn(0);
4450 }
4451 
4452 EXTERN_C_BEGIN
4453 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*);
4454 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*);
4455 EXTERN_C_END
4456 
4457 /*MC
4458    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4459 
4460    Options Database Keys:
4461 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4462 
4463   Level: beginner
4464 
4465 .seealso: MatCreateMPIAIJ
4466 M*/
4467 
4468 EXTERN_C_BEGIN
4469 #undef __FUNCT__
4470 #define __FUNCT__ "MatCreate_MPIAIJ"
4471 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4472 {
4473   Mat_MPIAIJ     *b;
4474   PetscErrorCode ierr;
4475   PetscMPIInt    size;
4476 
4477   PetscFunctionBegin;
4478   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
4479 
4480   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
4481   B->data         = (void*)b;
4482   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4483   B->factor       = 0;
4484   B->rmap.bs      = 1;
4485   B->assembled    = PETSC_FALSE;
4486   B->mapping      = 0;
4487 
4488   B->insertmode      = NOT_SET_VALUES;
4489   b->size            = size;
4490   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
4491 
4492   /* build cache for off array entries formed */
4493   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
4494   b->donotstash  = PETSC_FALSE;
4495   b->colmap      = 0;
4496   b->garray      = 0;
4497   b->roworiented = PETSC_TRUE;
4498 
4499   /* stuff used for matrix vector multiply */
4500   b->lvec      = PETSC_NULL;
4501   b->Mvctx     = PETSC_NULL;
4502 
4503   /* stuff for MatGetRow() */
4504   b->rowindices   = 0;
4505   b->rowvalues    = 0;
4506   b->getrowactive = PETSC_FALSE;
4507 
4508 
4509   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4510                                      "MatStoreValues_MPIAIJ",
4511                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4512   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4513                                      "MatRetrieveValues_MPIAIJ",
4514                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4515   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4516 				     "MatGetDiagonalBlock_MPIAIJ",
4517                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4518   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4519 				     "MatIsTranspose_MPIAIJ",
4520 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4521   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4522 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4523 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4524   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4525 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4526 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4527   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4528 				     "MatDiagonalScaleLocal_MPIAIJ",
4529 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4530   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4531                                      "MatConvert_MPIAIJ_MPICSRPERM",
4532                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
4533   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4534                                      "MatConvert_MPIAIJ_MPICRL",
4535                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
4536   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4537   PetscFunctionReturn(0);
4538 }
4539 EXTERN_C_END
4540 
4541 #undef __FUNCT__
4542 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
4543 /*@C
4544      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4545          and "off-diagonal" part of the matrix in CSR format.
4546 
4547    Collective on MPI_Comm
4548 
4549    Input Parameters:
4550 +  comm - MPI communicator
4551 .  m - number of local rows (Cannot be PETSC_DECIDE)
4552 .  n - This value should be the same as the local size used in creating the
4553        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4554        calculated if N is given) For square matrices n is almost always m.
4555 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4556 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4557 .   i - row indices for "diagonal" portion of matrix
4558 .   j - column indices
4559 .   a - matrix values
4560 .   oi - row indices for "off-diagonal" portion of matrix
4561 .   oj - column indices
4562 -   oa - matrix values
4563 
4564    Output Parameter:
4565 .   mat - the matrix
4566 
4567    Level: advanced
4568 
4569    Notes:
4570        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4571 
4572        The i and j indices are 0 based
4573 
4574        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4575 
4576 
4577 .keywords: matrix, aij, compressed row, sparse, parallel
4578 
4579 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4580           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4581 @*/
4582 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4583 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4584 {
4585   PetscErrorCode ierr;
4586   Mat_MPIAIJ     *maij;
4587 
4588  PetscFunctionBegin;
4589   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4590   if (i[0]) {
4591     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4592   }
4593   if (oi[0]) {
4594     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4595   }
4596   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4597   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4598   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4599   maij = (Mat_MPIAIJ*) (*mat)->data;
4600   maij->donotstash     = PETSC_TRUE;
4601   (*mat)->preallocated = PETSC_TRUE;
4602 
4603   (*mat)->rmap.bs = (*mat)->cmap.bs = 1;
4604   ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr);
4605   ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr);
4606 
4607   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
4608   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
4609 
4610   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4611   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4612   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4613   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4614 
4615   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4616   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4617   PetscFunctionReturn(0);
4618 }
4619 
4620 /*
4621     Special version for direct calls from Fortran
4622 */
4623 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4624 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4625 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4626 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4627 #endif
4628 
4629 /* Change these macros so can be used in void function */
4630 #undef CHKERRQ
4631 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
4632 #undef SETERRQ2
4633 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
4634 #undef SETERRQ
4635 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
4636 
4637 EXTERN_C_BEGIN
4638 #undef __FUNCT__
4639 #define __FUNCT__ "matsetvaluesmpiaij_"
4640 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4641 {
4642   Mat            mat = *mmat;
4643   PetscInt       m = *mm, n = *mn;
4644   InsertMode     addv = *maddv;
4645   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
4646   PetscScalar    value;
4647   PetscErrorCode ierr;
4648 
4649   MatPreallocated(mat);
4650   if (mat->insertmode == NOT_SET_VALUES) {
4651     mat->insertmode = addv;
4652   }
4653 #if defined(PETSC_USE_DEBUG)
4654   else if (mat->insertmode != addv) {
4655     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4656   }
4657 #endif
4658   {
4659   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4660   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4661   PetscTruth     roworiented = aij->roworiented;
4662 
4663   /* Some Variables required in the macro */
4664   Mat            A = aij->A;
4665   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
4666   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4667   PetscScalar    *aa = a->a;
4668   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4669   Mat            B = aij->B;
4670   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4671   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4672   PetscScalar    *ba = b->a;
4673 
4674   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4675   PetscInt       nonew = a->nonew;
4676   PetscScalar    *ap1,*ap2;
4677 
4678   PetscFunctionBegin;
4679   for (i=0; i<m; i++) {
4680     if (im[i] < 0) continue;
4681 #if defined(PETSC_USE_DEBUG)
4682     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4683 #endif
4684     if (im[i] >= rstart && im[i] < rend) {
4685       row      = im[i] - rstart;
4686       lastcol1 = -1;
4687       rp1      = aj + ai[row];
4688       ap1      = aa + ai[row];
4689       rmax1    = aimax[row];
4690       nrow1    = ailen[row];
4691       low1     = 0;
4692       high1    = nrow1;
4693       lastcol2 = -1;
4694       rp2      = bj + bi[row];
4695       ap2      = ba + bi[row];
4696       rmax2    = bimax[row];
4697       nrow2    = bilen[row];
4698       low2     = 0;
4699       high2    = nrow2;
4700 
4701       for (j=0; j<n; j++) {
4702         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4703         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4704         if (in[j] >= cstart && in[j] < cend){
4705           col = in[j] - cstart;
4706           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4707         } else if (in[j] < 0) continue;
4708 #if defined(PETSC_USE_DEBUG)
4709         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4710 #endif
4711         else {
4712           if (mat->was_assembled) {
4713             if (!aij->colmap) {
4714               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4715             }
4716 #if defined (PETSC_USE_CTABLE)
4717             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4718 	    col--;
4719 #else
4720             col = aij->colmap[in[j]] - 1;
4721 #endif
4722             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4723               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4724               col =  in[j];
4725               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4726               B = aij->B;
4727               b = (Mat_SeqAIJ*)B->data;
4728               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4729               rp2      = bj + bi[row];
4730               ap2      = ba + bi[row];
4731               rmax2    = bimax[row];
4732               nrow2    = bilen[row];
4733               low2     = 0;
4734               high2    = nrow2;
4735               bm       = aij->B->rmap.n;
4736               ba = b->a;
4737             }
4738           } else col = in[j];
4739           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4740         }
4741       }
4742     } else {
4743       if (!aij->donotstash) {
4744         if (roworiented) {
4745           if (ignorezeroentries && v[i*n] == 0.0) continue;
4746           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4747         } else {
4748           if (ignorezeroentries && v[i] == 0.0) continue;
4749           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4750         }
4751       }
4752     }
4753   }}
4754   PetscFunctionReturnVoid();
4755 }
4756 EXTERN_C_END
4757 
4758