xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision db268bfa9c46c24c134d4e2e1d78f27d60e1123e)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatGetValues_MPIAIJ"
240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
241 {
242   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
243   PetscErrorCode ierr;
244   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
245   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
246 
247   PetscFunctionBegin;
248   for (i=0; i<m; i++) {
249     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
250     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
251     if (idxm[i] >= rstart && idxm[i] < rend) {
252       row = idxm[i] - rstart;
253       for (j=0; j<n; j++) {
254         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
255         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
256         if (idxn[j] >= cstart && idxn[j] < cend){
257           col = idxn[j] - cstart;
258           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
259         } else {
260           if (!aij->colmap) {
261             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
262           }
263 #if defined (PETSC_USE_CTABLE)
264           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
265           col --;
266 #else
267           col = aij->colmap[idxn[j]] - 1;
268 #endif
269           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
270           else {
271             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
272           }
273         }
274       }
275     } else {
276       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
277     }
278   }
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
287   PetscErrorCode ierr;
288   PetscInt       nstash,reallocs;
289   InsertMode     addv;
290 
291   PetscFunctionBegin;
292   if (aij->donotstash) {
293     PetscFunctionReturn(0);
294   }
295 
296   /* make sure all processors are either in INSERTMODE or ADDMODE */
297   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
298   if (addv == (ADD_VALUES|INSERT_VALUES)) {
299     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
300   }
301   mat->insertmode = addv; /* in case this processor had no cache */
302 
303   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
304   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
305   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
312 {
313   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
314   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
315   PetscErrorCode ierr;
316   PetscMPIInt    n;
317   PetscInt       i,j,rstart,ncols,flg;
318   PetscInt       *row,*col,other_disassembled;
319   PetscScalar    *val;
320   InsertMode     addv = mat->insertmode;
321 
322   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
323   PetscFunctionBegin;
324   if (!aij->donotstash) {
325     while (1) {
326       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
327       if (!flg) break;
328 
329       for (i=0; i<n;) {
330         /* Now identify the consecutive vals belonging to the same row */
331         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
332         if (j < n) ncols = j-i;
333         else       ncols = n-i;
334         /* Now assemble all these values with a single function call */
335         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
336         i = j;
337       }
338     }
339     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
340   }
341   a->compressedrow.use     = PETSC_FALSE;
342   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
344 
345   /* determine if any processor has disassembled, if so we must
346      also disassemble ourselfs, in order that we may reassemble. */
347   /*
348      if nonzero structure of submatrix B cannot change then we know that
349      no processor disassembled thus we can skip this stuff
350   */
351   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
352     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
353     if (mat->was_assembled && !other_disassembled) {
354       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
355     }
356   }
357   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
358     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
359   }
360   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
361   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
362   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
363   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
364 
365   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
366   aij->rowvalues = 0;
367 
368   /* used by MatAXPY() */
369   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
370   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
371 
372   PetscFunctionReturn(0);
373 }
374 
375 #undef __FUNCT__
376 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
377 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
378 {
379   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
380   PetscErrorCode ierr;
381 
382   PetscFunctionBegin;
383   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
384   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
385   PetscFunctionReturn(0);
386 }
387 
388 #undef __FUNCT__
389 #define __FUNCT__ "MatZeroRows_MPIAIJ"
390 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
391 {
392   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
393   PetscErrorCode ierr;
394   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
395   PetscInt       i,*owners = A->rmap.range;
396   PetscInt       *nprocs,j,idx,nsends,row;
397   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
398   PetscInt       *rvalues,count,base,slen,*source;
399   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
400   MPI_Comm       comm = A->comm;
401   MPI_Request    *send_waits,*recv_waits;
402   MPI_Status     recv_status,*send_status;
403 #if defined(PETSC_DEBUG)
404   PetscTruth     found = PETSC_FALSE;
405 #endif
406 
407   PetscFunctionBegin;
408   /*  first count number of contributors to each processor */
409   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
410   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
411   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
412   j = 0;
413   for (i=0; i<N; i++) {
414     if (lastidx > (idx = rows[i])) j = 0;
415     lastidx = idx;
416     for (; j<size; j++) {
417       if (idx >= owners[j] && idx < owners[j+1]) {
418         nprocs[2*j]++;
419         nprocs[2*j+1] = 1;
420         owner[i] = j;
421 #if defined(PETSC_DEBUG)
422         found = PETSC_TRUE;
423 #endif
424         break;
425       }
426     }
427 #if defined(PETSC_DEBUG)
428     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
429     found = PETSC_FALSE;
430 #endif
431   }
432   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
433 
434   /* inform other processors of number of messages and max length*/
435   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
436 
437   /* post receives:   */
438   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
439   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
440   for (i=0; i<nrecvs; i++) {
441     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
442   }
443 
444   /* do sends:
445       1) starts[i] gives the starting index in svalues for stuff going to
446          the ith processor
447   */
448   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
449   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
450   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
451   starts[0] = 0;
452   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
453   for (i=0; i<N; i++) {
454     svalues[starts[owner[i]]++] = rows[i];
455   }
456 
457   starts[0] = 0;
458   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
459   count = 0;
460   for (i=0; i<size; i++) {
461     if (nprocs[2*i+1]) {
462       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
463     }
464   }
465   ierr = PetscFree(starts);CHKERRQ(ierr);
466 
467   base = owners[rank];
468 
469   /*  wait on receives */
470   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
471   source = lens + nrecvs;
472   count  = nrecvs; slen = 0;
473   while (count) {
474     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
475     /* unpack receives into our local space */
476     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
477     source[imdex]  = recv_status.MPI_SOURCE;
478     lens[imdex]    = n;
479     slen          += n;
480     count--;
481   }
482   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
483 
484   /* move the data into the send scatter */
485   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
486   count = 0;
487   for (i=0; i<nrecvs; i++) {
488     values = rvalues + i*nmax;
489     for (j=0; j<lens[i]; j++) {
490       lrows[count++] = values[j] - base;
491     }
492   }
493   ierr = PetscFree(rvalues);CHKERRQ(ierr);
494   ierr = PetscFree(lens);CHKERRQ(ierr);
495   ierr = PetscFree(owner);CHKERRQ(ierr);
496   ierr = PetscFree(nprocs);CHKERRQ(ierr);
497 
498   /* actually zap the local rows */
499   /*
500         Zero the required rows. If the "diagonal block" of the matrix
501      is square and the user wishes to set the diagonal we use separate
502      code so that MatSetValues() is not called for each diagonal allocating
503      new memory, thus calling lots of mallocs and slowing things down.
504 
505        Contributed by: Matthew Knepley
506   */
507   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
508   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
509   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
510     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
511   } else if (diag != 0.0) {
512     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
513     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
514       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
515 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
516     }
517     for (i = 0; i < slen; i++) {
518       row  = lrows[i] + rstart;
519       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
520     }
521     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
522     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
523   } else {
524     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
525   }
526   ierr = PetscFree(lrows);CHKERRQ(ierr);
527 
528   /* wait on sends */
529   if (nsends) {
530     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
531     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
532     ierr = PetscFree(send_status);CHKERRQ(ierr);
533   }
534   ierr = PetscFree(send_waits);CHKERRQ(ierr);
535   ierr = PetscFree(svalues);CHKERRQ(ierr);
536 
537   PetscFunctionReturn(0);
538 }
539 
540 #undef __FUNCT__
541 #define __FUNCT__ "MatMult_MPIAIJ"
542 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
543 {
544   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
545   PetscErrorCode ierr;
546   PetscInt       nt;
547 
548   PetscFunctionBegin;
549   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
550   if (nt != A->cmap.n) {
551     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
552   }
553   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
554   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
555   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
556   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatMultAdd_MPIAIJ"
562 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
563 {
564   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
565   PetscErrorCode ierr;
566 
567   PetscFunctionBegin;
568   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
569   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
570   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
571   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 #undef __FUNCT__
576 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
577 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
578 {
579   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
580   PetscErrorCode ierr;
581   PetscTruth     merged;
582 
583   PetscFunctionBegin;
584   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
585   /* do nondiagonal part */
586   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
587   if (!merged) {
588     /* send it on its way */
589     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
590     /* do local part */
591     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
592     /* receive remote parts: note this assumes the values are not actually */
593     /* added in yy until the next line, */
594     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
595   } else {
596     /* do local part */
597     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
598     /* send it on its way */
599     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
600     /* values actually were received in the Begin() but we need to call this nop */
601     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
602   }
603   PetscFunctionReturn(0);
604 }
605 
606 EXTERN_C_BEGIN
607 #undef __FUNCT__
608 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
609 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
610 {
611   MPI_Comm       comm;
612   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
613   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
614   IS             Me,Notme;
615   PetscErrorCode ierr;
616   PetscInt       M,N,first,last,*notme,i;
617   PetscMPIInt    size;
618 
619   PetscFunctionBegin;
620 
621   /* Easy test: symmetric diagonal block */
622   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
623   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
624   if (!*f) PetscFunctionReturn(0);
625   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
626   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
627   if (size == 1) PetscFunctionReturn(0);
628 
629   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
630   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
631   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
632   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
633   for (i=0; i<first; i++) notme[i] = i;
634   for (i=last; i<M; i++) notme[i-last+first] = i;
635   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
636   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
637   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
638   Aoff = Aoffs[0];
639   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
640   Boff = Boffs[0];
641   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
642   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
643   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
644   ierr = ISDestroy(Me);CHKERRQ(ierr);
645   ierr = ISDestroy(Notme);CHKERRQ(ierr);
646 
647   PetscFunctionReturn(0);
648 }
649 EXTERN_C_END
650 
651 #undef __FUNCT__
652 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
653 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
654 {
655   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
656   PetscErrorCode ierr;
657 
658   PetscFunctionBegin;
659   /* do nondiagonal part */
660   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
661   /* send it on its way */
662   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
663   /* do local part */
664   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
665   /* receive remote parts */
666   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
667   PetscFunctionReturn(0);
668 }
669 
670 /*
671   This only works correctly for square matrices where the subblock A->A is the
672    diagonal block
673 */
674 #undef __FUNCT__
675 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
676 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
677 {
678   PetscErrorCode ierr;
679   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
680 
681   PetscFunctionBegin;
682   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
683   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
684     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
685   }
686   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 
690 #undef __FUNCT__
691 #define __FUNCT__ "MatScale_MPIAIJ"
692 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
693 {
694   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
695   PetscErrorCode ierr;
696 
697   PetscFunctionBegin;
698   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
699   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
700   PetscFunctionReturn(0);
701 }
702 
703 #undef __FUNCT__
704 #define __FUNCT__ "MatDestroy_MPIAIJ"
705 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
706 {
707   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
708   PetscErrorCode ierr;
709 
710   PetscFunctionBegin;
711 #if defined(PETSC_USE_LOG)
712   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
713 #endif
714   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
715   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
716   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
717 #if defined (PETSC_USE_CTABLE)
718   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
719 #else
720   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
721 #endif
722   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
723   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
724   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
725   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
726   ierr = PetscFree(aij);CHKERRQ(ierr);
727 
728   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
729   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
730   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
736   PetscFunctionReturn(0);
737 }
738 
739 #undef __FUNCT__
740 #define __FUNCT__ "MatView_MPIAIJ_Binary"
741 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
742 {
743   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
744   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
745   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
746   PetscErrorCode    ierr;
747   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
748   int               fd;
749   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
750   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
751   PetscScalar       *column_values;
752 
753   PetscFunctionBegin;
754   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
755   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
756   nz   = A->nz + B->nz;
757   if (!rank) {
758     header[0] = MAT_FILE_COOKIE;
759     header[1] = mat->rmap.N;
760     header[2] = mat->cmap.N;
761     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
762     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
763     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
764     /* get largest number of rows any processor has */
765     rlen = mat->rmap.n;
766     range = mat->rmap.range;
767     for (i=1; i<size; i++) {
768       rlen = PetscMax(rlen,range[i+1] - range[i]);
769     }
770   } else {
771     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
772     rlen = mat->rmap.n;
773   }
774 
775   /* load up the local row counts */
776   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
777   for (i=0; i<mat->rmap.n; i++) {
778     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
779   }
780 
781   /* store the row lengths to the file */
782   if (!rank) {
783     MPI_Status status;
784     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
785     for (i=1; i<size; i++) {
786       rlen = range[i+1] - range[i];
787       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
788       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
789     }
790   } else {
791     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
792   }
793   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
794 
795   /* load up the local column indices */
796   nzmax = nz; /* )th processor needs space a largest processor needs */
797   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
798   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
799   cnt  = 0;
800   for (i=0; i<mat->rmap.n; i++) {
801     for (j=B->i[i]; j<B->i[i+1]; j++) {
802       if ( (col = garray[B->j[j]]) > cstart) break;
803       column_indices[cnt++] = col;
804     }
805     for (k=A->i[i]; k<A->i[i+1]; k++) {
806       column_indices[cnt++] = A->j[k] + cstart;
807     }
808     for (; j<B->i[i+1]; j++) {
809       column_indices[cnt++] = garray[B->j[j]];
810     }
811   }
812   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
813 
814   /* store the column indices to the file */
815   if (!rank) {
816     MPI_Status status;
817     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
818     for (i=1; i<size; i++) {
819       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
820       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
821       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
822       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
823     }
824   } else {
825     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
826     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
827   }
828   ierr = PetscFree(column_indices);CHKERRQ(ierr);
829 
830   /* load up the local column values */
831   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
832   cnt  = 0;
833   for (i=0; i<mat->rmap.n; i++) {
834     for (j=B->i[i]; j<B->i[i+1]; j++) {
835       if ( garray[B->j[j]] > cstart) break;
836       column_values[cnt++] = B->a[j];
837     }
838     for (k=A->i[i]; k<A->i[i+1]; k++) {
839       column_values[cnt++] = A->a[k];
840     }
841     for (; j<B->i[i+1]; j++) {
842       column_values[cnt++] = B->a[j];
843     }
844   }
845   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
846 
847   /* store the column values to the file */
848   if (!rank) {
849     MPI_Status status;
850     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
851     for (i=1; i<size; i++) {
852       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
853       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
854       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
855       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
856     }
857   } else {
858     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
859     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
860   }
861   ierr = PetscFree(column_values);CHKERRQ(ierr);
862   PetscFunctionReturn(0);
863 }
864 
865 #undef __FUNCT__
866 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
867 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
868 {
869   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
870   PetscErrorCode    ierr;
871   PetscMPIInt       rank = aij->rank,size = aij->size;
872   PetscTruth        isdraw,iascii,isbinary;
873   PetscViewer       sviewer;
874   PetscViewerFormat format;
875 
876   PetscFunctionBegin;
877   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
878   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
880   if (iascii) {
881     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
882     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
883       MatInfo    info;
884       PetscTruth inodes;
885 
886       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
887       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
888       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
889       if (!inodes) {
890         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
891 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
892       } else {
893         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
894 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
895       }
896       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
897       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
898       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
899       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
900       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
901       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
902       PetscFunctionReturn(0);
903     } else if (format == PETSC_VIEWER_ASCII_INFO) {
904       PetscInt   inodecount,inodelimit,*inodes;
905       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
906       if (inodes) {
907         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
908       } else {
909         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
910       }
911       PetscFunctionReturn(0);
912     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
913       PetscFunctionReturn(0);
914     }
915   } else if (isbinary) {
916     if (size == 1) {
917       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
918       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
919     } else {
920       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
921     }
922     PetscFunctionReturn(0);
923   } else if (isdraw) {
924     PetscDraw  draw;
925     PetscTruth isnull;
926     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
927     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
928   }
929 
930   if (size == 1) {
931     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
932     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
933   } else {
934     /* assemble the entire matrix onto first processor. */
935     Mat         A;
936     Mat_SeqAIJ  *Aloc;
937     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
938     PetscScalar *a;
939 
940     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
941     if (!rank) {
942       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
943     } else {
944       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
945     }
946     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
947     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
948     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
949     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
950 
951     /* copy over the A part */
952     Aloc = (Mat_SeqAIJ*)aij->A->data;
953     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
954     row = mat->rmap.rstart;
955     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
956     for (i=0; i<m; i++) {
957       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
958       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
959     }
960     aj = Aloc->j;
961     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
962 
963     /* copy over the B part */
964     Aloc = (Mat_SeqAIJ*)aij->B->data;
965     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
966     row  = mat->rmap.rstart;
967     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
968     ct   = cols;
969     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
970     for (i=0; i<m; i++) {
971       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
972       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
973     }
974     ierr = PetscFree(ct);CHKERRQ(ierr);
975     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977     /*
978        Everyone has to call to draw the matrix since the graphics waits are
979        synchronized across all processors that share the PetscDraw object
980     */
981     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
982     if (!rank) {
983       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
984       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
985     }
986     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
987     ierr = MatDestroy(A);CHKERRQ(ierr);
988   }
989   PetscFunctionReturn(0);
990 }
991 
992 #undef __FUNCT__
993 #define __FUNCT__ "MatView_MPIAIJ"
994 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
995 {
996   PetscErrorCode ierr;
997   PetscTruth     iascii,isdraw,issocket,isbinary;
998 
999   PetscFunctionBegin;
1000   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1001   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1002   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1003   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1004   if (iascii || isdraw || isbinary || issocket) {
1005     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1006   } else {
1007     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1008   }
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #undef __FUNCT__
1013 #define __FUNCT__ "MatRelax_MPIAIJ"
1014 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1015 {
1016   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1017   PetscErrorCode ierr;
1018   Vec            bb1;
1019 
1020   PetscFunctionBegin;
1021   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1022 
1023   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1024 
1025   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1026     if (flag & SOR_ZERO_INITIAL_GUESS) {
1027       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1028       its--;
1029     }
1030 
1031     while (its--) {
1032       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1033       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1034 
1035       /* update rhs: bb1 = bb - B*x */
1036       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1037       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1038 
1039       /* local sweep */
1040       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1041       CHKERRQ(ierr);
1042     }
1043   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1044     if (flag & SOR_ZERO_INITIAL_GUESS) {
1045       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1046       its--;
1047     }
1048     while (its--) {
1049       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1050       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1051 
1052       /* update rhs: bb1 = bb - B*x */
1053       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1054       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1055 
1056       /* local sweep */
1057       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1058       CHKERRQ(ierr);
1059     }
1060   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1061     if (flag & SOR_ZERO_INITIAL_GUESS) {
1062       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1063       its--;
1064     }
1065     while (its--) {
1066       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1067       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1068 
1069       /* update rhs: bb1 = bb - B*x */
1070       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1071       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1072 
1073       /* local sweep */
1074       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1075       CHKERRQ(ierr);
1076     }
1077   } else {
1078     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1079   }
1080 
1081   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 #undef __FUNCT__
1086 #define __FUNCT__ "MatPermute_MPIAIJ"
1087 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1088 {
1089   MPI_Comm       comm,pcomm;
1090   PetscInt       first,local_size,nrows,*rows;
1091   int            ntids;
1092   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1093   PetscErrorCode ierr;
1094 
1095   PetscFunctionBegin;
1096   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1097   /* make a collective version of 'rowp' */
1098   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1099   if (pcomm==comm) {
1100     crowp = rowp;
1101   } else {
1102     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1103     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1104     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1105     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1106   }
1107   /* collect the global row permutation and invert it */
1108   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1109   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1110   if (pcomm!=comm) {
1111     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1112   }
1113   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1114   /* get the local target indices */
1115   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1116   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1117   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1118   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1119   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1120   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1121   /* the column permutation is so much easier;
1122      make a local version of 'colp' and invert it */
1123   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1124   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1125   if (ntids==1) {
1126     lcolp = colp;
1127   } else {
1128     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1129     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1130     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1131   }
1132   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1133   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1134   if (ntids>1) {
1135     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1136     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1137   }
1138   /* now we just get the submatrix */
1139   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1140   /* clean up */
1141   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1142   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 #undef __FUNCT__
1147 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1148 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1149 {
1150   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1151   Mat            A = mat->A,B = mat->B;
1152   PetscErrorCode ierr;
1153   PetscReal      isend[5],irecv[5];
1154 
1155   PetscFunctionBegin;
1156   info->block_size     = 1.0;
1157   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1158   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1159   isend[3] = info->memory;  isend[4] = info->mallocs;
1160   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1161   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1162   isend[3] += info->memory;  isend[4] += info->mallocs;
1163   if (flag == MAT_LOCAL) {
1164     info->nz_used      = isend[0];
1165     info->nz_allocated = isend[1];
1166     info->nz_unneeded  = isend[2];
1167     info->memory       = isend[3];
1168     info->mallocs      = isend[4];
1169   } else if (flag == MAT_GLOBAL_MAX) {
1170     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1171     info->nz_used      = irecv[0];
1172     info->nz_allocated = irecv[1];
1173     info->nz_unneeded  = irecv[2];
1174     info->memory       = irecv[3];
1175     info->mallocs      = irecv[4];
1176   } else if (flag == MAT_GLOBAL_SUM) {
1177     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1178     info->nz_used      = irecv[0];
1179     info->nz_allocated = irecv[1];
1180     info->nz_unneeded  = irecv[2];
1181     info->memory       = irecv[3];
1182     info->mallocs      = irecv[4];
1183   }
1184   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1185   info->fill_ratio_needed = 0;
1186   info->factor_mallocs    = 0;
1187   info->rows_global       = (double)matin->rmap.N;
1188   info->columns_global    = (double)matin->cmap.N;
1189   info->rows_local        = (double)matin->rmap.n;
1190   info->columns_local     = (double)matin->cmap.N;
1191 
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 #undef __FUNCT__
1196 #define __FUNCT__ "MatSetOption_MPIAIJ"
1197 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1198 {
1199   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1200   PetscErrorCode ierr;
1201 
1202   PetscFunctionBegin;
1203   switch (op) {
1204   case MAT_NO_NEW_NONZERO_LOCATIONS:
1205   case MAT_YES_NEW_NONZERO_LOCATIONS:
1206   case MAT_COLUMNS_UNSORTED:
1207   case MAT_COLUMNS_SORTED:
1208   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1209   case MAT_KEEP_ZEROED_ROWS:
1210   case MAT_NEW_NONZERO_LOCATION_ERR:
1211   case MAT_USE_INODES:
1212   case MAT_DO_NOT_USE_INODES:
1213   case MAT_IGNORE_ZERO_ENTRIES:
1214     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1215     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1216     break;
1217   case MAT_ROW_ORIENTED:
1218     a->roworiented = PETSC_TRUE;
1219     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1220     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1221     break;
1222   case MAT_ROWS_SORTED:
1223   case MAT_ROWS_UNSORTED:
1224   case MAT_YES_NEW_DIAGONALS:
1225     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1226     break;
1227   case MAT_COLUMN_ORIENTED:
1228     a->roworiented = PETSC_FALSE;
1229     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1230     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1231     break;
1232   case MAT_IGNORE_OFF_PROC_ENTRIES:
1233     a->donotstash = PETSC_TRUE;
1234     break;
1235   case MAT_NO_NEW_DIAGONALS:
1236     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1237   case MAT_SYMMETRIC:
1238     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1239     break;
1240   case MAT_STRUCTURALLY_SYMMETRIC:
1241   case MAT_HERMITIAN:
1242   case MAT_SYMMETRY_ETERNAL:
1243     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1244     break;
1245   case MAT_NOT_SYMMETRIC:
1246   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1247   case MAT_NOT_HERMITIAN:
1248   case MAT_NOT_SYMMETRY_ETERNAL:
1249     break;
1250   default:
1251     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1252   }
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 #undef __FUNCT__
1257 #define __FUNCT__ "MatGetRow_MPIAIJ"
1258 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1259 {
1260   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1261   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1262   PetscErrorCode ierr;
1263   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1264   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1265   PetscInt       *cmap,*idx_p;
1266 
1267   PetscFunctionBegin;
1268   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1269   mat->getrowactive = PETSC_TRUE;
1270 
1271   if (!mat->rowvalues && (idx || v)) {
1272     /*
1273         allocate enough space to hold information from the longest row.
1274     */
1275     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1276     PetscInt     max = 1,tmp;
1277     for (i=0; i<matin->rmap.n; i++) {
1278       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1279       if (max < tmp) { max = tmp; }
1280     }
1281     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1282     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1283   }
1284 
1285   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1286   lrow = row - rstart;
1287 
1288   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1289   if (!v)   {pvA = 0; pvB = 0;}
1290   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1291   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1292   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1293   nztot = nzA + nzB;
1294 
1295   cmap  = mat->garray;
1296   if (v  || idx) {
1297     if (nztot) {
1298       /* Sort by increasing column numbers, assuming A and B already sorted */
1299       PetscInt imark = -1;
1300       if (v) {
1301         *v = v_p = mat->rowvalues;
1302         for (i=0; i<nzB; i++) {
1303           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1304           else break;
1305         }
1306         imark = i;
1307         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1308         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1309       }
1310       if (idx) {
1311         *idx = idx_p = mat->rowindices;
1312         if (imark > -1) {
1313           for (i=0; i<imark; i++) {
1314             idx_p[i] = cmap[cworkB[i]];
1315           }
1316         } else {
1317           for (i=0; i<nzB; i++) {
1318             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1319             else break;
1320           }
1321           imark = i;
1322         }
1323         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1324         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1325       }
1326     } else {
1327       if (idx) *idx = 0;
1328       if (v)   *v   = 0;
1329     }
1330   }
1331   *nz = nztot;
1332   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1333   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 #undef __FUNCT__
1338 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1339 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1340 {
1341   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1342 
1343   PetscFunctionBegin;
1344   if (!aij->getrowactive) {
1345     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1346   }
1347   aij->getrowactive = PETSC_FALSE;
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 #undef __FUNCT__
1352 #define __FUNCT__ "MatNorm_MPIAIJ"
1353 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1354 {
1355   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1356   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1357   PetscErrorCode ierr;
1358   PetscInt       i,j,cstart = mat->cmap.rstart;
1359   PetscReal      sum = 0.0;
1360   PetscScalar    *v;
1361 
1362   PetscFunctionBegin;
1363   if (aij->size == 1) {
1364     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1365   } else {
1366     if (type == NORM_FROBENIUS) {
1367       v = amat->a;
1368       for (i=0; i<amat->nz; i++) {
1369 #if defined(PETSC_USE_COMPLEX)
1370         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1371 #else
1372         sum += (*v)*(*v); v++;
1373 #endif
1374       }
1375       v = bmat->a;
1376       for (i=0; i<bmat->nz; i++) {
1377 #if defined(PETSC_USE_COMPLEX)
1378         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1379 #else
1380         sum += (*v)*(*v); v++;
1381 #endif
1382       }
1383       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1384       *norm = sqrt(*norm);
1385     } else if (type == NORM_1) { /* max column norm */
1386       PetscReal *tmp,*tmp2;
1387       PetscInt    *jj,*garray = aij->garray;
1388       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1389       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1390       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1391       *norm = 0.0;
1392       v = amat->a; jj = amat->j;
1393       for (j=0; j<amat->nz; j++) {
1394         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1395       }
1396       v = bmat->a; jj = bmat->j;
1397       for (j=0; j<bmat->nz; j++) {
1398         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1399       }
1400       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1401       for (j=0; j<mat->cmap.N; j++) {
1402         if (tmp2[j] > *norm) *norm = tmp2[j];
1403       }
1404       ierr = PetscFree(tmp);CHKERRQ(ierr);
1405       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1406     } else if (type == NORM_INFINITY) { /* max row norm */
1407       PetscReal ntemp = 0.0;
1408       for (j=0; j<aij->A->rmap.n; j++) {
1409         v = amat->a + amat->i[j];
1410         sum = 0.0;
1411         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1412           sum += PetscAbsScalar(*v); v++;
1413         }
1414         v = bmat->a + bmat->i[j];
1415         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1416           sum += PetscAbsScalar(*v); v++;
1417         }
1418         if (sum > ntemp) ntemp = sum;
1419       }
1420       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1421     } else {
1422       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1423     }
1424   }
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatTranspose_MPIAIJ"
1430 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1431 {
1432   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1433   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1434   PetscErrorCode ierr;
1435   PetscInt       M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
1436   Mat            B;
1437   PetscScalar    *array;
1438 
1439   PetscFunctionBegin;
1440   if (!matout && M != N) {
1441     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1442   }
1443 
1444   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1445   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1446   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1447   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1448 
1449   /* copy over the A part */
1450   Aloc = (Mat_SeqAIJ*)a->A->data;
1451   m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1452   row = A->rmap.rstart;
1453   for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;}
1454   for (i=0; i<m; i++) {
1455     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1456     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1457   }
1458   aj = Aloc->j;
1459   for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;}
1460 
1461   /* copy over the B part */
1462   Aloc = (Mat_SeqAIJ*)a->B->data;
1463   m = a->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1464   row  = A->rmap.rstart;
1465   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1466   ct   = cols;
1467   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1468   for (i=0; i<m; i++) {
1469     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1470     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1471   }
1472   ierr = PetscFree(ct);CHKERRQ(ierr);
1473   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1474   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1475   if (matout) {
1476     *matout = B;
1477   } else {
1478     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1479   }
1480   PetscFunctionReturn(0);
1481 }
1482 
1483 #undef __FUNCT__
1484 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1485 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1486 {
1487   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1488   Mat            a = aij->A,b = aij->B;
1489   PetscErrorCode ierr;
1490   PetscInt       s1,s2,s3;
1491 
1492   PetscFunctionBegin;
1493   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1494   if (rr) {
1495     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1496     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1497     /* Overlap communication with computation. */
1498     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1499   }
1500   if (ll) {
1501     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1502     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1503     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1504   }
1505   /* scale  the diagonal block */
1506   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1507 
1508   if (rr) {
1509     /* Do a scatter end and then right scale the off-diagonal block */
1510     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1511     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1512   }
1513 
1514   PetscFunctionReturn(0);
1515 }
1516 
1517 #undef __FUNCT__
1518 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1519 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1520 {
1521   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1522   PetscErrorCode ierr;
1523 
1524   PetscFunctionBegin;
1525   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1526   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1527   PetscFunctionReturn(0);
1528 }
1529 #undef __FUNCT__
1530 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1531 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1532 {
1533   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1534   PetscErrorCode ierr;
1535 
1536   PetscFunctionBegin;
1537   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 #undef __FUNCT__
1542 #define __FUNCT__ "MatEqual_MPIAIJ"
1543 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1544 {
1545   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1546   Mat            a,b,c,d;
1547   PetscTruth     flg;
1548   PetscErrorCode ierr;
1549 
1550   PetscFunctionBegin;
1551   a = matA->A; b = matA->B;
1552   c = matB->A; d = matB->B;
1553 
1554   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1555   if (flg) {
1556     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1557   }
1558   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 #undef __FUNCT__
1563 #define __FUNCT__ "MatCopy_MPIAIJ"
1564 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1565 {
1566   PetscErrorCode ierr;
1567   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1568   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1569 
1570   PetscFunctionBegin;
1571   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1572   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1573     /* because of the column compression in the off-processor part of the matrix a->B,
1574        the number of columns in a->B and b->B may be different, hence we cannot call
1575        the MatCopy() directly on the two parts. If need be, we can provide a more
1576        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1577        then copying the submatrices */
1578     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1579   } else {
1580     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1581     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1582   }
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 #undef __FUNCT__
1587 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1588 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1589 {
1590   PetscErrorCode ierr;
1591 
1592   PetscFunctionBegin;
1593   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1594   PetscFunctionReturn(0);
1595 }
1596 
1597 #include "petscblaslapack.h"
1598 #undef __FUNCT__
1599 #define __FUNCT__ "MatAXPY_MPIAIJ"
1600 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1601 {
1602   PetscErrorCode ierr;
1603   PetscInt       i;
1604   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1605   PetscBLASInt   bnz,one=1;
1606   Mat_SeqAIJ     *x,*y;
1607 
1608   PetscFunctionBegin;
1609   if (str == SAME_NONZERO_PATTERN) {
1610     PetscScalar alpha = a;
1611     x = (Mat_SeqAIJ *)xx->A->data;
1612     y = (Mat_SeqAIJ *)yy->A->data;
1613     bnz = (PetscBLASInt)x->nz;
1614     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1615     x = (Mat_SeqAIJ *)xx->B->data;
1616     y = (Mat_SeqAIJ *)yy->B->data;
1617     bnz = (PetscBLASInt)x->nz;
1618     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1619   } else if (str == SUBSET_NONZERO_PATTERN) {
1620     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1621 
1622     x = (Mat_SeqAIJ *)xx->B->data;
1623     y = (Mat_SeqAIJ *)yy->B->data;
1624     if (y->xtoy && y->XtoY != xx->B) {
1625       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1626       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1627     }
1628     if (!y->xtoy) { /* get xtoy */
1629       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1630       y->XtoY = xx->B;
1631       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1632     }
1633     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1634   } else {
1635     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1636   }
1637   PetscFunctionReturn(0);
1638 }
1639 
1640 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1641 
1642 #undef __FUNCT__
1643 #define __FUNCT__ "MatConjugate_MPIAIJ"
1644 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1645 {
1646 #if defined(PETSC_USE_COMPLEX)
1647   PetscErrorCode ierr;
1648   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1649 
1650   PetscFunctionBegin;
1651   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1652   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1653 #else
1654   PetscFunctionBegin;
1655 #endif
1656   PetscFunctionReturn(0);
1657 }
1658 
1659 #undef __FUNCT__
1660 #define __FUNCT__ "MatRealPart_MPIAIJ"
1661 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1662 {
1663   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1664   PetscErrorCode ierr;
1665 
1666   PetscFunctionBegin;
1667   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1668   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1669   PetscFunctionReturn(0);
1670 }
1671 
1672 #undef __FUNCT__
1673 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1674 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1675 {
1676   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1677   PetscErrorCode ierr;
1678 
1679   PetscFunctionBegin;
1680   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1681   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 #ifdef PETSC_HAVE_PBGL
1686 
1687 #include <boost/parallel/mpi/bsp_process_group.hpp>
1688 #include <boost/graph/distributed/ilu_default_graph.hpp>
1689 #include <boost/graph/distributed/ilu_0_block.hpp>
1690 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1691 #include <boost/graph/distributed/petsc/interface.hpp>
1692 #include <boost/multi_array.hpp>
1693 #include <boost/parallel/distributed_property_map.hpp>
1694 
1695 #undef __FUNCT__
1696 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1697 /*
1698   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1699 */
1700 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1701 {
1702   namespace petsc = boost::distributed::petsc;
1703 
1704   namespace graph_dist = boost::graph::distributed;
1705   using boost::graph::distributed::ilu_default::process_group_type;
1706   using boost::graph::ilu_permuted;
1707 
1708   PetscTruth      row_identity, col_identity;
1709   PetscContainer  c;
1710   PetscInt        m, n, M, N;
1711   PetscErrorCode  ierr;
1712 
1713   PetscFunctionBegin;
1714   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1715   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1716   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1717   if (!row_identity || !col_identity) {
1718     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1719   }
1720 
1721   process_group_type pg;
1722   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1723   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1724   lgraph_type&   level_graph = *lgraph_p;
1725   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1726 
1727   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1728   ilu_permuted(level_graph);
1729 
1730   /* put together the new matrix */
1731   ierr = MatCreate(A->comm, fact);CHKERRQ(ierr);
1732   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1733   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1734   ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr);
1735   ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr);
1736   ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1737   ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1738   (*fact)->factor = FACTOR_LU;
1739 
1740   ierr = PetscContainerCreate(A->comm, &c);
1741   ierr = PetscContainerSetPointer(c, lgraph_p);
1742   ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1743   PetscFunctionReturn(0);
1744 }
1745 
1746 #undef __FUNCT__
1747 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1748 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1749 {
1750   PetscFunctionBegin;
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 #undef __FUNCT__
1755 #define __FUNCT__ "MatSolve_MPIAIJ"
1756 /*
1757   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1758 */
1759 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1760 {
1761   namespace graph_dist = boost::graph::distributed;
1762 
1763   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1764   lgraph_type*   lgraph_p;
1765   PetscContainer c;
1766   PetscErrorCode ierr;
1767 
1768   PetscFunctionBegin;
1769   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1770   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1771   ierr = VecCopy(b, x); CHKERRQ(ierr);
1772 
1773   PetscScalar* array_x;
1774   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1775   PetscInt sx;
1776   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1777 
1778   PetscScalar* array_b;
1779   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1780   PetscInt sb;
1781   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1782 
1783   lgraph_type&   level_graph = *lgraph_p;
1784   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1785 
1786   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1787   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1788                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1789 
1790   typedef boost::iterator_property_map<array_ref_type::iterator,
1791                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1792   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1793                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1794 
1795   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1796 
1797   PetscFunctionReturn(0);
1798 }
1799 #endif
1800 
1801 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1802   PetscInt       nzlocal,nsends,nrecvs;
1803   PetscInt       *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j;
1804   PetscScalar    *sbuf_a,**rbuf_a;
1805   PetscErrorCode (*MatDestroy)(Mat);
1806 } Mat_Redundant;
1807 
1808 #undef __FUNCT__
1809 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1810 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1811 {
1812   PetscErrorCode       ierr;
1813   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1814   PetscInt             i;
1815 
1816   PetscFunctionBegin;
1817   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1818   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1819   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1820   for (i=0; i<redund->nrecvs; i++){
1821     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1822     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1823   }
1824   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1825   ierr = PetscFree(redund);CHKERRQ(ierr);
1826   PetscFunctionReturn(0);
1827 }
1828 
1829 #undef __FUNCT__
1830 #define __FUNCT__ "MatDestroy_MatRedundant"
1831 PetscErrorCode MatDestroy_MatRedundant(Mat A)
1832 {
1833   PetscErrorCode  ierr;
1834   PetscContainer  container;
1835   Mat_Redundant   *redund=PETSC_NULL;
1836 
1837   PetscFunctionBegin;
1838   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1839   if (container) {
1840     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1841   } else {
1842     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1843   }
1844   A->ops->destroy = redund->MatDestroy;
1845   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
1846   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
1847   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
1848   PetscFunctionReturn(0);
1849 }
1850 
1851 #undef __FUNCT__
1852 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
1853 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
1854 {
1855   PetscMPIInt    rank,size;
1856   MPI_Comm       comm=mat->comm;
1857   PetscErrorCode ierr;
1858   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
1859   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
1860   PetscInt       *rowrange=mat->rmap.range;
1861   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1862   Mat            A=aij->A,B=aij->B,C=*matredundant;
1863   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
1864   PetscScalar    *sbuf_a;
1865   PetscInt       nzlocal=a->nz+b->nz;
1866   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
1867   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
1868   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
1869   PetscScalar    *vals,*aworkA,*aworkB;
1870   PetscMPIInt    tag1,tag2,tag3,imdex;
1871   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
1872                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
1873   MPI_Status     recv_status,*send_status;
1874   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
1875   PetscInt       **rbuf_j=PETSC_NULL;
1876   PetscScalar    **rbuf_a=PETSC_NULL;
1877   Mat_Redundant  *redund=PETSC_NULL;
1878   PetscContainer container;
1879 
1880   PetscFunctionBegin;
1881   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1882   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1883 
1884   if (reuse == MAT_REUSE_MATRIX) {
1885     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
1886     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
1887     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
1888     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
1889     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1890     if (container) {
1891       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1892     } else {
1893       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1894     }
1895     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
1896 
1897     nsends    = redund->nsends;
1898     nrecvs    = redund->nrecvs;
1899     send_rank = redund->send_rank; recv_rank = send_rank + size;
1900     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
1901     sbuf_j    = redund->sbuf_j;
1902     sbuf_a    = redund->sbuf_a;
1903     rbuf_j    = redund->rbuf_j;
1904     rbuf_a    = redund->rbuf_a;
1905   }
1906 
1907   if (reuse == MAT_INITIAL_MATRIX){
1908     PetscMPIInt  subrank,subsize;
1909     PetscInt     nleftover,np_subcomm;
1910     /* get the destination processors' id send_rank, nsends and nrecvs */
1911     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
1912     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
1913     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
1914     recv_rank = send_rank + size;
1915     np_subcomm = size/nsubcomm;
1916     nleftover  = size - nsubcomm*np_subcomm;
1917     nsends = 0; nrecvs = 0;
1918     for (i=0; i<size; i++){ /* i=rank*/
1919       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
1920         send_rank[nsends] = i; nsends++;
1921         recv_rank[nrecvs++] = i;
1922       }
1923     }
1924     if (rank >= size - nleftover){/* this proc is a leftover processor */
1925       i = size-nleftover-1;
1926       j = 0;
1927       while (j < nsubcomm - nleftover){
1928         send_rank[nsends++] = i;
1929         i--; j++;
1930       }
1931     }
1932 
1933     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
1934       for (i=0; i<nleftover; i++){
1935         recv_rank[nrecvs++] = size-nleftover+i;
1936       }
1937     }
1938 
1939     /* allocate sbuf_j, sbuf_a */
1940     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
1941     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
1942     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
1943   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1944 
1945   /* copy mat's local entries into the buffers */
1946   if (reuse == MAT_INITIAL_MATRIX){
1947     rownz_max = 0;
1948     rptr = sbuf_j;
1949     cols = sbuf_j + rend-rstart + 1;
1950     vals = sbuf_a;
1951     rptr[0] = 0;
1952     for (i=0; i<rend-rstart; i++){
1953       row = i + rstart;
1954       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1955       ncols  = nzA + nzB;
1956       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1957       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1958       /* load the column indices for this row into cols */
1959       lwrite = 0;
1960       for (l=0; l<nzB; l++) {
1961         if ((ctmp = bmap[cworkB[l]]) < cstart){
1962           vals[lwrite]   = aworkB[l];
1963           cols[lwrite++] = ctmp;
1964         }
1965       }
1966       for (l=0; l<nzA; l++){
1967         vals[lwrite]   = aworkA[l];
1968         cols[lwrite++] = cstart + cworkA[l];
1969       }
1970       for (l=0; l<nzB; l++) {
1971         if ((ctmp = bmap[cworkB[l]]) >= cend){
1972           vals[lwrite]   = aworkB[l];
1973           cols[lwrite++] = ctmp;
1974         }
1975       }
1976       vals += ncols;
1977       cols += ncols;
1978       rptr[i+1] = rptr[i] + ncols;
1979       if (rownz_max < ncols) rownz_max = ncols;
1980     }
1981     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
1982   } else { /* only copy matrix values into sbuf_a */
1983     rptr = sbuf_j;
1984     vals = sbuf_a;
1985     rptr[0] = 0;
1986     for (i=0; i<rend-rstart; i++){
1987       row = i + rstart;
1988       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1989       ncols  = nzA + nzB;
1990       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1991       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1992       lwrite = 0;
1993       for (l=0; l<nzB; l++) {
1994         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
1995       }
1996       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
1997       for (l=0; l<nzB; l++) {
1998         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
1999       }
2000       vals += ncols;
2001       rptr[i+1] = rptr[i] + ncols;
2002     }
2003   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2004 
2005   /* send nzlocal to others, and recv other's nzlocal */
2006   /*--------------------------------------------------*/
2007   if (reuse == MAT_INITIAL_MATRIX){
2008     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2009     s_waits2 = s_waits3 + nsends;
2010     s_waits1 = s_waits2 + nsends;
2011     r_waits1 = s_waits1 + nsends;
2012     r_waits2 = r_waits1 + nrecvs;
2013     r_waits3 = r_waits2 + nrecvs;
2014   } else {
2015     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2016     r_waits3 = s_waits3 + nsends;
2017   }
2018 
2019   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2020   if (reuse == MAT_INITIAL_MATRIX){
2021     /* get new tags to keep the communication clean */
2022     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2023     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2024     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2025     rbuf_nz = sbuf_nz + nsends;
2026 
2027     /* post receives of other's nzlocal */
2028     for (i=0; i<nrecvs; i++){
2029       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2030     }
2031     /* send nzlocal to others */
2032     for (i=0; i<nsends; i++){
2033       sbuf_nz[i] = nzlocal;
2034       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2035     }
2036     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2037     count = nrecvs;
2038     while (count) {
2039       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2040       recv_rank[imdex] = recv_status.MPI_SOURCE;
2041       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2042       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2043 
2044       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2045       rbuf_nz[imdex] += i + 2;
2046       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2047       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2048       count--;
2049     }
2050     /* wait on sends of nzlocal */
2051     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2052     /* send mat->i,j to others, and recv from other's */
2053     /*------------------------------------------------*/
2054     for (i=0; i<nsends; i++){
2055       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2056       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2057     }
2058     /* wait on receives of mat->i,j */
2059     /*------------------------------*/
2060     count = nrecvs;
2061     while (count) {
2062       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2063       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2064       count--;
2065     }
2066     /* wait on sends of mat->i,j */
2067     /*---------------------------*/
2068     if (nsends) {
2069       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2070     }
2071   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2072 
2073   /* post receives, send and receive mat->a */
2074   /*----------------------------------------*/
2075   for (imdex=0; imdex<nrecvs; imdex++) {
2076     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2077   }
2078   for (i=0; i<nsends; i++){
2079     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2080   }
2081   count = nrecvs;
2082   while (count) {
2083     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2084     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2085     count--;
2086   }
2087   if (nsends) {
2088     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2089   }
2090 
2091   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2092 
2093   /* create redundant matrix */
2094   /*-------------------------*/
2095   if (reuse == MAT_INITIAL_MATRIX){
2096     /* compute rownz_max for preallocation */
2097     for (imdex=0; imdex<nrecvs; imdex++){
2098       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2099       rptr = rbuf_j[imdex];
2100       for (i=0; i<j; i++){
2101         ncols = rptr[i+1] - rptr[i];
2102         if (rownz_max < ncols) rownz_max = ncols;
2103       }
2104     }
2105 
2106     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2107     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2108     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2109     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2110     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2111   } else {
2112     C = *matredundant;
2113   }
2114 
2115   /* insert local matrix entries */
2116   rptr = sbuf_j;
2117   cols = sbuf_j + rend-rstart + 1;
2118   vals = sbuf_a;
2119   for (i=0; i<rend-rstart; i++){
2120     row   = i + rstart;
2121     ncols = rptr[i+1] - rptr[i];
2122     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2123     vals += ncols;
2124     cols += ncols;
2125   }
2126   /* insert received matrix entries */
2127   for (imdex=0; imdex<nrecvs; imdex++){
2128     rstart = rowrange[recv_rank[imdex]];
2129     rend   = rowrange[recv_rank[imdex]+1];
2130     rptr = rbuf_j[imdex];
2131     cols = rbuf_j[imdex] + rend-rstart + 1;
2132     vals = rbuf_a[imdex];
2133     for (i=0; i<rend-rstart; i++){
2134       row   = i + rstart;
2135       ncols = rptr[i+1] - rptr[i];
2136       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2137       vals += ncols;
2138       cols += ncols;
2139     }
2140   }
2141   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2142   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2143   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2144   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
2145   if (reuse == MAT_INITIAL_MATRIX){
2146     PetscContainer container;
2147     *matredundant = C;
2148     /* create a supporting struct and attach it to C for reuse */
2149     ierr = PetscNew(Mat_Redundant,&redund);CHKERRQ(ierr);
2150     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2151     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2152     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2153     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2154 
2155     redund->nzlocal = nzlocal;
2156     redund->nsends  = nsends;
2157     redund->nrecvs  = nrecvs;
2158     redund->send_rank = send_rank;
2159     redund->sbuf_nz = sbuf_nz;
2160     redund->sbuf_j  = sbuf_j;
2161     redund->sbuf_a  = sbuf_a;
2162     redund->rbuf_j  = rbuf_j;
2163     redund->rbuf_a  = rbuf_a;
2164 
2165     redund->MatDestroy = C->ops->destroy;
2166     C->ops->destroy    = MatDestroy_MatRedundant;
2167   }
2168   PetscFunctionReturn(0);
2169 }
2170 
2171 /* -------------------------------------------------------------------*/
2172 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2173        MatGetRow_MPIAIJ,
2174        MatRestoreRow_MPIAIJ,
2175        MatMult_MPIAIJ,
2176 /* 4*/ MatMultAdd_MPIAIJ,
2177        MatMultTranspose_MPIAIJ,
2178        MatMultTransposeAdd_MPIAIJ,
2179 #ifdef PETSC_HAVE_PBGL
2180        MatSolve_MPIAIJ,
2181 #else
2182        0,
2183 #endif
2184        0,
2185        0,
2186 /*10*/ 0,
2187        0,
2188        0,
2189        MatRelax_MPIAIJ,
2190        MatTranspose_MPIAIJ,
2191 /*15*/ MatGetInfo_MPIAIJ,
2192        MatEqual_MPIAIJ,
2193        MatGetDiagonal_MPIAIJ,
2194        MatDiagonalScale_MPIAIJ,
2195        MatNorm_MPIAIJ,
2196 /*20*/ MatAssemblyBegin_MPIAIJ,
2197        MatAssemblyEnd_MPIAIJ,
2198        0,
2199        MatSetOption_MPIAIJ,
2200        MatZeroEntries_MPIAIJ,
2201 /*25*/ MatZeroRows_MPIAIJ,
2202        0,
2203 #ifdef PETSC_HAVE_PBGL
2204        MatLUFactorNumeric_MPIAIJ,
2205 #else
2206        0,
2207 #endif
2208        0,
2209        0,
2210 /*30*/ MatSetUpPreallocation_MPIAIJ,
2211 #ifdef PETSC_HAVE_PBGL
2212        MatILUFactorSymbolic_MPIAIJ,
2213 #else
2214        0,
2215 #endif
2216        0,
2217        0,
2218        0,
2219 /*35*/ MatDuplicate_MPIAIJ,
2220        0,
2221        0,
2222        0,
2223        0,
2224 /*40*/ MatAXPY_MPIAIJ,
2225        MatGetSubMatrices_MPIAIJ,
2226        MatIncreaseOverlap_MPIAIJ,
2227        MatGetValues_MPIAIJ,
2228        MatCopy_MPIAIJ,
2229 /*45*/ 0,
2230        MatScale_MPIAIJ,
2231        0,
2232        0,
2233        0,
2234 /*50*/ MatSetBlockSize_MPIAIJ,
2235        0,
2236        0,
2237        0,
2238        0,
2239 /*55*/ MatFDColoringCreate_MPIAIJ,
2240        0,
2241        MatSetUnfactored_MPIAIJ,
2242        MatPermute_MPIAIJ,
2243        0,
2244 /*60*/ MatGetSubMatrix_MPIAIJ,
2245        MatDestroy_MPIAIJ,
2246        MatView_MPIAIJ,
2247        0,
2248        0,
2249 /*65*/ 0,
2250        0,
2251        0,
2252        0,
2253        0,
2254 /*70*/ 0,
2255        0,
2256        MatSetColoring_MPIAIJ,
2257 #if defined(PETSC_HAVE_ADIC)
2258        MatSetValuesAdic_MPIAIJ,
2259 #else
2260        0,
2261 #endif
2262        MatSetValuesAdifor_MPIAIJ,
2263 /*75*/ 0,
2264        0,
2265        0,
2266        0,
2267        0,
2268 /*80*/ 0,
2269        0,
2270        0,
2271        0,
2272 /*84*/ MatLoad_MPIAIJ,
2273        0,
2274        0,
2275        0,
2276        0,
2277        0,
2278 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2279        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2280        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2281        MatPtAP_Basic,
2282        MatPtAPSymbolic_MPIAIJ,
2283 /*95*/ MatPtAPNumeric_MPIAIJ,
2284        0,
2285        0,
2286        0,
2287        0,
2288 /*100*/0,
2289        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2290        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2291        MatConjugate_MPIAIJ,
2292        0,
2293 /*105*/MatSetValuesRow_MPIAIJ,
2294        MatRealPart_MPIAIJ,
2295        MatImaginaryPart_MPIAIJ,
2296        0,
2297        0,
2298 /*110*/0,
2299        MatGetRedundantMatrix_MPIAIJ};
2300 
2301 /* ----------------------------------------------------------------------------------------*/
2302 
2303 EXTERN_C_BEGIN
2304 #undef __FUNCT__
2305 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2306 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2307 {
2308   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2309   PetscErrorCode ierr;
2310 
2311   PetscFunctionBegin;
2312   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2313   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2314   PetscFunctionReturn(0);
2315 }
2316 EXTERN_C_END
2317 
2318 EXTERN_C_BEGIN
2319 #undef __FUNCT__
2320 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2321 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2322 {
2323   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2324   PetscErrorCode ierr;
2325 
2326   PetscFunctionBegin;
2327   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2328   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2329   PetscFunctionReturn(0);
2330 }
2331 EXTERN_C_END
2332 
2333 #include "petscpc.h"
2334 EXTERN_C_BEGIN
2335 #undef __FUNCT__
2336 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2337 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2338 {
2339   Mat_MPIAIJ     *b;
2340   PetscErrorCode ierr;
2341   PetscInt       i;
2342 
2343   PetscFunctionBegin;
2344   B->preallocated = PETSC_TRUE;
2345   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2346   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2347   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2348   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2349 
2350   B->rmap.bs = B->cmap.bs = 1;
2351   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2352   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2353   if (d_nnz) {
2354     for (i=0; i<B->rmap.n; i++) {
2355       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2356     }
2357   }
2358   if (o_nnz) {
2359     for (i=0; i<B->rmap.n; i++) {
2360       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2361     }
2362   }
2363   b = (Mat_MPIAIJ*)B->data;
2364 
2365   /* Explicitly create 2 MATSEQAIJ matrices. */
2366   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2367   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2368   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2369   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2370   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2371   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2372   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2373   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2374 
2375   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2376   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2377 
2378   PetscFunctionReturn(0);
2379 }
2380 EXTERN_C_END
2381 
2382 #undef __FUNCT__
2383 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2384 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2385 {
2386   Mat            mat;
2387   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2388   PetscErrorCode ierr;
2389 
2390   PetscFunctionBegin;
2391   *newmat       = 0;
2392   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2393   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2394   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2395   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2396   a    = (Mat_MPIAIJ*)mat->data;
2397 
2398   mat->factor       = matin->factor;
2399   mat->rmap.bs      = matin->rmap.bs;
2400   mat->assembled    = PETSC_TRUE;
2401   mat->insertmode   = NOT_SET_VALUES;
2402   mat->preallocated = PETSC_TRUE;
2403 
2404   a->size           = oldmat->size;
2405   a->rank           = oldmat->rank;
2406   a->donotstash     = oldmat->donotstash;
2407   a->roworiented    = oldmat->roworiented;
2408   a->rowindices     = 0;
2409   a->rowvalues      = 0;
2410   a->getrowactive   = PETSC_FALSE;
2411 
2412   ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2413   ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2414 
2415   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2416   if (oldmat->colmap) {
2417 #if defined (PETSC_USE_CTABLE)
2418     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2419 #else
2420     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2421     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2422     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2423 #endif
2424   } else a->colmap = 0;
2425   if (oldmat->garray) {
2426     PetscInt len;
2427     len  = oldmat->B->cmap.n;
2428     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2429     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2430     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2431   } else a->garray = 0;
2432 
2433   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2434   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2435   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2436   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2437   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2438   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2439   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2440   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2441   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2442   *newmat = mat;
2443   PetscFunctionReturn(0);
2444 }
2445 
2446 #include "petscsys.h"
2447 
2448 #undef __FUNCT__
2449 #define __FUNCT__ "MatLoad_MPIAIJ"
2450 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2451 {
2452   Mat            A;
2453   PetscScalar    *vals,*svals;
2454   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2455   MPI_Status     status;
2456   PetscErrorCode ierr;
2457   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2458   PetscInt       i,nz,j,rstart,rend,mmax;
2459   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2460   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2461   PetscInt       cend,cstart,n,*rowners;
2462   int            fd;
2463 
2464   PetscFunctionBegin;
2465   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2466   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2467   if (!rank) {
2468     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2469     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2470     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2471   }
2472 
2473   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2474   M = header[1]; N = header[2];
2475   /* determine ownership of all rows */
2476   m    = M/size + ((M % size) > rank);
2477   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2478   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2479 
2480   /* First process needs enough room for process with most rows */
2481   if (!rank) {
2482     mmax       = rowners[1];
2483     for (i=2; i<size; i++) {
2484       mmax = PetscMax(mmax,rowners[i]);
2485     }
2486   } else mmax = m;
2487 
2488   rowners[0] = 0;
2489   for (i=2; i<=size; i++) {
2490     rowners[i] += rowners[i-1];
2491   }
2492   rstart = rowners[rank];
2493   rend   = rowners[rank+1];
2494 
2495   /* distribute row lengths to all processors */
2496   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2497   if (!rank) {
2498     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2499     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2500     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2501     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2502     for (j=0; j<m; j++) {
2503       procsnz[0] += ourlens[j];
2504     }
2505     for (i=1; i<size; i++) {
2506       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2507       /* calculate the number of nonzeros on each processor */
2508       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2509         procsnz[i] += rowlengths[j];
2510       }
2511       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2512     }
2513     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2514   } else {
2515     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2516   }
2517 
2518   if (!rank) {
2519     /* determine max buffer needed and allocate it */
2520     maxnz = 0;
2521     for (i=0; i<size; i++) {
2522       maxnz = PetscMax(maxnz,procsnz[i]);
2523     }
2524     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2525 
2526     /* read in my part of the matrix column indices  */
2527     nz   = procsnz[0];
2528     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2529     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2530 
2531     /* read in every one elses and ship off */
2532     for (i=1; i<size; i++) {
2533       nz   = procsnz[i];
2534       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2535       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2536     }
2537     ierr = PetscFree(cols);CHKERRQ(ierr);
2538   } else {
2539     /* determine buffer space needed for message */
2540     nz = 0;
2541     for (i=0; i<m; i++) {
2542       nz += ourlens[i];
2543     }
2544     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2545 
2546     /* receive message of column indices*/
2547     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2548     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2549     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2550   }
2551 
2552   /* determine column ownership if matrix is not square */
2553   if (N != M) {
2554     n      = N/size + ((N % size) > rank);
2555     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2556     cstart = cend - n;
2557   } else {
2558     cstart = rstart;
2559     cend   = rend;
2560     n      = cend - cstart;
2561   }
2562 
2563   /* loop over local rows, determining number of off diagonal entries */
2564   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2565   jj = 0;
2566   for (i=0; i<m; i++) {
2567     for (j=0; j<ourlens[i]; j++) {
2568       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2569       jj++;
2570     }
2571   }
2572 
2573   /* create our matrix */
2574   for (i=0; i<m; i++) {
2575     ourlens[i] -= offlens[i];
2576   }
2577   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2578   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2579   ierr = MatSetType(A,type);CHKERRQ(ierr);
2580   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2581 
2582   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2583   for (i=0; i<m; i++) {
2584     ourlens[i] += offlens[i];
2585   }
2586 
2587   if (!rank) {
2588     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2589 
2590     /* read in my part of the matrix numerical values  */
2591     nz   = procsnz[0];
2592     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2593 
2594     /* insert into matrix */
2595     jj      = rstart;
2596     smycols = mycols;
2597     svals   = vals;
2598     for (i=0; i<m; i++) {
2599       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2600       smycols += ourlens[i];
2601       svals   += ourlens[i];
2602       jj++;
2603     }
2604 
2605     /* read in other processors and ship out */
2606     for (i=1; i<size; i++) {
2607       nz   = procsnz[i];
2608       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2609       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2610     }
2611     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2612   } else {
2613     /* receive numeric values */
2614     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2615 
2616     /* receive message of values*/
2617     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2618     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2619     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2620 
2621     /* insert into matrix */
2622     jj      = rstart;
2623     smycols = mycols;
2624     svals   = vals;
2625     for (i=0; i<m; i++) {
2626       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2627       smycols += ourlens[i];
2628       svals   += ourlens[i];
2629       jj++;
2630     }
2631   }
2632   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2633   ierr = PetscFree(vals);CHKERRQ(ierr);
2634   ierr = PetscFree(mycols);CHKERRQ(ierr);
2635   ierr = PetscFree(rowners);CHKERRQ(ierr);
2636 
2637   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2638   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2639   *newmat = A;
2640   PetscFunctionReturn(0);
2641 }
2642 
2643 #undef __FUNCT__
2644 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2645 /*
2646     Not great since it makes two copies of the submatrix, first an SeqAIJ
2647   in local and then by concatenating the local matrices the end result.
2648   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2649 */
2650 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2651 {
2652   PetscErrorCode ierr;
2653   PetscMPIInt    rank,size;
2654   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2655   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2656   Mat            *local,M,Mreuse;
2657   PetscScalar    *vwork,*aa;
2658   MPI_Comm       comm = mat->comm;
2659   Mat_SeqAIJ     *aij;
2660 
2661 
2662   PetscFunctionBegin;
2663   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2664   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2665 
2666   if (call ==  MAT_REUSE_MATRIX) {
2667     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2668     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2669     local = &Mreuse;
2670     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2671   } else {
2672     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2673     Mreuse = *local;
2674     ierr   = PetscFree(local);CHKERRQ(ierr);
2675   }
2676 
2677   /*
2678       m - number of local rows
2679       n - number of columns (same on all processors)
2680       rstart - first row in new global matrix generated
2681   */
2682   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2683   if (call == MAT_INITIAL_MATRIX) {
2684     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2685     ii  = aij->i;
2686     jj  = aij->j;
2687 
2688     /*
2689         Determine the number of non-zeros in the diagonal and off-diagonal
2690         portions of the matrix in order to do correct preallocation
2691     */
2692 
2693     /* first get start and end of "diagonal" columns */
2694     if (csize == PETSC_DECIDE) {
2695       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2696       if (mglobal == n) { /* square matrix */
2697 	nlocal = m;
2698       } else {
2699         nlocal = n/size + ((n % size) > rank);
2700       }
2701     } else {
2702       nlocal = csize;
2703     }
2704     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2705     rstart = rend - nlocal;
2706     if (rank == size - 1 && rend != n) {
2707       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2708     }
2709 
2710     /* next, compute all the lengths */
2711     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2712     olens = dlens + m;
2713     for (i=0; i<m; i++) {
2714       jend = ii[i+1] - ii[i];
2715       olen = 0;
2716       dlen = 0;
2717       for (j=0; j<jend; j++) {
2718         if (*jj < rstart || *jj >= rend) olen++;
2719         else dlen++;
2720         jj++;
2721       }
2722       olens[i] = olen;
2723       dlens[i] = dlen;
2724     }
2725     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2726     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2727     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2728     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2729     ierr = PetscFree(dlens);CHKERRQ(ierr);
2730   } else {
2731     PetscInt ml,nl;
2732 
2733     M = *newmat;
2734     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2735     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2736     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2737     /*
2738          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2739        rather than the slower MatSetValues().
2740     */
2741     M->was_assembled = PETSC_TRUE;
2742     M->assembled     = PETSC_FALSE;
2743   }
2744   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2745   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2746   ii  = aij->i;
2747   jj  = aij->j;
2748   aa  = aij->a;
2749   for (i=0; i<m; i++) {
2750     row   = rstart + i;
2751     nz    = ii[i+1] - ii[i];
2752     cwork = jj;     jj += nz;
2753     vwork = aa;     aa += nz;
2754     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2755   }
2756 
2757   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2758   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2759   *newmat = M;
2760 
2761   /* save submatrix used in processor for next request */
2762   if (call ==  MAT_INITIAL_MATRIX) {
2763     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2764     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2765   }
2766 
2767   PetscFunctionReturn(0);
2768 }
2769 
2770 EXTERN_C_BEGIN
2771 #undef __FUNCT__
2772 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2773 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2774 {
2775   PetscInt       m,cstart, cend,j,nnz,i,d;
2776   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2777   const PetscInt *JJ;
2778   PetscScalar    *values;
2779   PetscErrorCode ierr;
2780 
2781   PetscFunctionBegin;
2782   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2783 
2784   B->rmap.bs = B->cmap.bs = 1;
2785   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2786   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2787   m      = B->rmap.n;
2788   cstart = B->cmap.rstart;
2789   cend   = B->cmap.rend;
2790   rstart = B->rmap.rstart;
2791 
2792   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2793   o_nnz = d_nnz + m;
2794 
2795   for (i=0; i<m; i++) {
2796     nnz     = Ii[i+1]- Ii[i];
2797     JJ      = J + Ii[i];
2798     nnz_max = PetscMax(nnz_max,nnz);
2799     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2800     for (j=0; j<nnz; j++) {
2801       if (*JJ >= cstart) break;
2802       JJ++;
2803     }
2804     d = 0;
2805     for (; j<nnz; j++) {
2806       if (*JJ++ >= cend) break;
2807       d++;
2808     }
2809     d_nnz[i] = d;
2810     o_nnz[i] = nnz - d;
2811   }
2812   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2813   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2814 
2815   if (v) values = (PetscScalar*)v;
2816   else {
2817     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2818     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2819   }
2820 
2821   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2822   for (i=0; i<m; i++) {
2823     ii   = i + rstart;
2824     nnz  = Ii[i+1]- Ii[i];
2825     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2826   }
2827   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2828   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2829   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2830 
2831   if (!v) {
2832     ierr = PetscFree(values);CHKERRQ(ierr);
2833   }
2834   PetscFunctionReturn(0);
2835 }
2836 EXTERN_C_END
2837 
2838 #undef __FUNCT__
2839 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2840 /*@
2841    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2842    (the default parallel PETSc format).
2843 
2844    Collective on MPI_Comm
2845 
2846    Input Parameters:
2847 +  B - the matrix
2848 .  i - the indices into j for the start of each local row (starts with zero)
2849 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2850 -  v - optional values in the matrix
2851 
2852    Level: developer
2853 
2854    Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal
2855      storage format. Thus changing the values in a[] after this call will not effect the matrix values.
2856 
2857 .keywords: matrix, aij, compressed row, sparse, parallel
2858 
2859 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
2860           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
2861 @*/
2862 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2863 {
2864   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2865 
2866   PetscFunctionBegin;
2867   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2868   if (f) {
2869     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2870   }
2871   PetscFunctionReturn(0);
2872 }
2873 
2874 #undef __FUNCT__
2875 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2876 /*@C
2877    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2878    (the default parallel PETSc format).  For good matrix assembly performance
2879    the user should preallocate the matrix storage by setting the parameters
2880    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2881    performance can be increased by more than a factor of 50.
2882 
2883    Collective on MPI_Comm
2884 
2885    Input Parameters:
2886 +  A - the matrix
2887 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2888            (same value is used for all local rows)
2889 .  d_nnz - array containing the number of nonzeros in the various rows of the
2890            DIAGONAL portion of the local submatrix (possibly different for each row)
2891            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2892            The size of this array is equal to the number of local rows, i.e 'm'.
2893            You must leave room for the diagonal entry even if it is zero.
2894 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2895            submatrix (same value is used for all local rows).
2896 -  o_nnz - array containing the number of nonzeros in the various rows of the
2897            OFF-DIAGONAL portion of the local submatrix (possibly different for
2898            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2899            structure. The size of this array is equal to the number
2900            of local rows, i.e 'm'.
2901 
2902    If the *_nnz parameter is given then the *_nz parameter is ignored
2903 
2904    The AIJ format (also called the Yale sparse matrix format or
2905    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2906    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2907 
2908    The parallel matrix is partitioned such that the first m0 rows belong to
2909    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2910    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2911 
2912    The DIAGONAL portion of the local submatrix of a processor can be defined
2913    as the submatrix which is obtained by extraction the part corresponding
2914    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2915    first row that belongs to the processor, and r2 is the last row belonging
2916    to the this processor. This is a square mxm matrix. The remaining portion
2917    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2918 
2919    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2920 
2921    Example usage:
2922 
2923    Consider the following 8x8 matrix with 34 non-zero values, that is
2924    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2925    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2926    as follows:
2927 
2928 .vb
2929             1  2  0  |  0  3  0  |  0  4
2930     Proc0   0  5  6  |  7  0  0  |  8  0
2931             9  0 10  | 11  0  0  | 12  0
2932     -------------------------------------
2933            13  0 14  | 15 16 17  |  0  0
2934     Proc1   0 18  0  | 19 20 21  |  0  0
2935             0  0  0  | 22 23  0  | 24  0
2936     -------------------------------------
2937     Proc2  25 26 27  |  0  0 28  | 29  0
2938            30  0  0  | 31 32 33  |  0 34
2939 .ve
2940 
2941    This can be represented as a collection of submatrices as:
2942 
2943 .vb
2944       A B C
2945       D E F
2946       G H I
2947 .ve
2948 
2949    Where the submatrices A,B,C are owned by proc0, D,E,F are
2950    owned by proc1, G,H,I are owned by proc2.
2951 
2952    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2953    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2954    The 'M','N' parameters are 8,8, and have the same values on all procs.
2955 
2956    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2957    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2958    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2959    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2960    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2961    matrix, ans [DF] as another SeqAIJ matrix.
2962 
2963    When d_nz, o_nz parameters are specified, d_nz storage elements are
2964    allocated for every row of the local diagonal submatrix, and o_nz
2965    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2966    One way to choose d_nz and o_nz is to use the max nonzerors per local
2967    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2968    In this case, the values of d_nz,o_nz are:
2969 .vb
2970      proc0 : dnz = 2, o_nz = 2
2971      proc1 : dnz = 3, o_nz = 2
2972      proc2 : dnz = 1, o_nz = 4
2973 .ve
2974    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2975    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2976    for proc3. i.e we are using 12+15+10=37 storage locations to store
2977    34 values.
2978 
2979    When d_nnz, o_nnz parameters are specified, the storage is specified
2980    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2981    In the above case the values for d_nnz,o_nnz are:
2982 .vb
2983      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2984      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2985      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2986 .ve
2987    Here the space allocated is sum of all the above values i.e 34, and
2988    hence pre-allocation is perfect.
2989 
2990    Level: intermediate
2991 
2992 .keywords: matrix, aij, compressed row, sparse, parallel
2993 
2994 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2995           MPIAIJ
2996 @*/
2997 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2998 {
2999   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3000 
3001   PetscFunctionBegin;
3002   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3003   if (f) {
3004     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3005   }
3006   PetscFunctionReturn(0);
3007 }
3008 
3009 #undef __FUNCT__
3010 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3011 /*@C
3012      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3013          CSR format the local rows.
3014 
3015    Collective on MPI_Comm
3016 
3017    Input Parameters:
3018 +  comm - MPI communicator
3019 .  m - number of local rows (Cannot be PETSC_DECIDE)
3020 .  n - This value should be the same as the local size used in creating the
3021        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3022        calculated if N is given) For square matrices n is almost always m.
3023 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3024 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3025 .   i - row indices
3026 .   j - column indices
3027 -   a - matrix values
3028 
3029    Output Parameter:
3030 .   mat - the matrix
3031 
3032    Level: intermediate
3033 
3034    Notes:
3035        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3036      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3037      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3038 
3039        The i and j indices are 0 based
3040 
3041 .keywords: matrix, aij, compressed row, sparse, parallel
3042 
3043 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3044           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3045 @*/
3046 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3047 {
3048   PetscErrorCode ierr;
3049 
3050  PetscFunctionBegin;
3051   if (i[0]) {
3052     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3053   }
3054   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3055   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3056   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3057   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3058   PetscFunctionReturn(0);
3059 }
3060 
3061 #undef __FUNCT__
3062 #define __FUNCT__ "MatCreateMPIAIJ"
3063 /*@C
3064    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3065    (the default parallel PETSc format).  For good matrix assembly performance
3066    the user should preallocate the matrix storage by setting the parameters
3067    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3068    performance can be increased by more than a factor of 50.
3069 
3070    Collective on MPI_Comm
3071 
3072    Input Parameters:
3073 +  comm - MPI communicator
3074 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3075            This value should be the same as the local size used in creating the
3076            y vector for the matrix-vector product y = Ax.
3077 .  n - This value should be the same as the local size used in creating the
3078        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3079        calculated if N is given) For square matrices n is almost always m.
3080 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3081 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3082 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3083            (same value is used for all local rows)
3084 .  d_nnz - array containing the number of nonzeros in the various rows of the
3085            DIAGONAL portion of the local submatrix (possibly different for each row)
3086            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3087            The size of this array is equal to the number of local rows, i.e 'm'.
3088            You must leave room for the diagonal entry even if it is zero.
3089 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3090            submatrix (same value is used for all local rows).
3091 -  o_nnz - array containing the number of nonzeros in the various rows of the
3092            OFF-DIAGONAL portion of the local submatrix (possibly different for
3093            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3094            structure. The size of this array is equal to the number
3095            of local rows, i.e 'm'.
3096 
3097    Output Parameter:
3098 .  A - the matrix
3099 
3100    Notes:
3101    If the *_nnz parameter is given then the *_nz parameter is ignored
3102 
3103    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3104    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3105    storage requirements for this matrix.
3106 
3107    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3108    processor than it must be used on all processors that share the object for
3109    that argument.
3110 
3111    The user MUST specify either the local or global matrix dimensions
3112    (possibly both).
3113 
3114    The parallel matrix is partitioned such that the first m0 rows belong to
3115    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3116    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3117 
3118    The DIAGONAL portion of the local submatrix of a processor can be defined
3119    as the submatrix which is obtained by extraction the part corresponding
3120    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3121    first row that belongs to the processor, and r2 is the last row belonging
3122    to the this processor. This is a square mxm matrix. The remaining portion
3123    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3124 
3125    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3126 
3127    When calling this routine with a single process communicator, a matrix of
3128    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3129    type of communicator, use the construction mechanism:
3130      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3131 
3132    By default, this format uses inodes (identical nodes) when possible.
3133    We search for consecutive rows with the same nonzero structure, thereby
3134    reusing matrix information to achieve increased efficiency.
3135 
3136    Options Database Keys:
3137 +  -mat_no_inode  - Do not use inodes
3138 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3139 -  -mat_aij_oneindex - Internally use indexing starting at 1
3140         rather than 0.  Note that when calling MatSetValues(),
3141         the user still MUST index entries starting at 0!
3142 
3143 
3144    Example usage:
3145 
3146    Consider the following 8x8 matrix with 34 non-zero values, that is
3147    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3148    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3149    as follows:
3150 
3151 .vb
3152             1  2  0  |  0  3  0  |  0  4
3153     Proc0   0  5  6  |  7  0  0  |  8  0
3154             9  0 10  | 11  0  0  | 12  0
3155     -------------------------------------
3156            13  0 14  | 15 16 17  |  0  0
3157     Proc1   0 18  0  | 19 20 21  |  0  0
3158             0  0  0  | 22 23  0  | 24  0
3159     -------------------------------------
3160     Proc2  25 26 27  |  0  0 28  | 29  0
3161            30  0  0  | 31 32 33  |  0 34
3162 .ve
3163 
3164    This can be represented as a collection of submatrices as:
3165 
3166 .vb
3167       A B C
3168       D E F
3169       G H I
3170 .ve
3171 
3172    Where the submatrices A,B,C are owned by proc0, D,E,F are
3173    owned by proc1, G,H,I are owned by proc2.
3174 
3175    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3176    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3177    The 'M','N' parameters are 8,8, and have the same values on all procs.
3178 
3179    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3180    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3181    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3182    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3183    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3184    matrix, ans [DF] as another SeqAIJ matrix.
3185 
3186    When d_nz, o_nz parameters are specified, d_nz storage elements are
3187    allocated for every row of the local diagonal submatrix, and o_nz
3188    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3189    One way to choose d_nz and o_nz is to use the max nonzerors per local
3190    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3191    In this case, the values of d_nz,o_nz are:
3192 .vb
3193      proc0 : dnz = 2, o_nz = 2
3194      proc1 : dnz = 3, o_nz = 2
3195      proc2 : dnz = 1, o_nz = 4
3196 .ve
3197    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3198    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3199    for proc3. i.e we are using 12+15+10=37 storage locations to store
3200    34 values.
3201 
3202    When d_nnz, o_nnz parameters are specified, the storage is specified
3203    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3204    In the above case the values for d_nnz,o_nnz are:
3205 .vb
3206      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3207      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3208      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3209 .ve
3210    Here the space allocated is sum of all the above values i.e 34, and
3211    hence pre-allocation is perfect.
3212 
3213    Level: intermediate
3214 
3215 .keywords: matrix, aij, compressed row, sparse, parallel
3216 
3217 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3218           MPIAIJ, MatCreateMPIAIJWithArrays()
3219 @*/
3220 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3221 {
3222   PetscErrorCode ierr;
3223   PetscMPIInt    size;
3224 
3225   PetscFunctionBegin;
3226   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3227   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3228   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3229   if (size > 1) {
3230     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3231     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3232   } else {
3233     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3234     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3235   }
3236   PetscFunctionReturn(0);
3237 }
3238 
3239 #undef __FUNCT__
3240 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3241 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3242 {
3243   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3244 
3245   PetscFunctionBegin;
3246   *Ad     = a->A;
3247   *Ao     = a->B;
3248   *colmap = a->garray;
3249   PetscFunctionReturn(0);
3250 }
3251 
3252 #undef __FUNCT__
3253 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3254 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3255 {
3256   PetscErrorCode ierr;
3257   PetscInt       i;
3258   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3259 
3260   PetscFunctionBegin;
3261   if (coloring->ctype == IS_COLORING_GLOBAL) {
3262     ISColoringValue *allcolors,*colors;
3263     ISColoring      ocoloring;
3264 
3265     /* set coloring for diagonal portion */
3266     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3267 
3268     /* set coloring for off-diagonal portion */
3269     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3270     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3271     for (i=0; i<a->B->cmap.n; i++) {
3272       colors[i] = allcolors[a->garray[i]];
3273     }
3274     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3275     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3276     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3277     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3278   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3279     ISColoringValue *colors;
3280     PetscInt        *larray;
3281     ISColoring      ocoloring;
3282 
3283     /* set coloring for diagonal portion */
3284     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3285     for (i=0; i<a->A->cmap.n; i++) {
3286       larray[i] = i + A->cmap.rstart;
3287     }
3288     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3289     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3290     for (i=0; i<a->A->cmap.n; i++) {
3291       colors[i] = coloring->colors[larray[i]];
3292     }
3293     ierr = PetscFree(larray);CHKERRQ(ierr);
3294     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3295     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3296     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3297 
3298     /* set coloring for off-diagonal portion */
3299     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3300     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3301     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3302     for (i=0; i<a->B->cmap.n; i++) {
3303       colors[i] = coloring->colors[larray[i]];
3304     }
3305     ierr = PetscFree(larray);CHKERRQ(ierr);
3306     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3307     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3308     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3309   } else {
3310     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3311   }
3312 
3313   PetscFunctionReturn(0);
3314 }
3315 
3316 #if defined(PETSC_HAVE_ADIC)
3317 #undef __FUNCT__
3318 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3319 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3320 {
3321   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3322   PetscErrorCode ierr;
3323 
3324   PetscFunctionBegin;
3325   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3326   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3327   PetscFunctionReturn(0);
3328 }
3329 #endif
3330 
3331 #undef __FUNCT__
3332 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3333 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3334 {
3335   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3336   PetscErrorCode ierr;
3337 
3338   PetscFunctionBegin;
3339   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3340   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3341   PetscFunctionReturn(0);
3342 }
3343 
3344 #undef __FUNCT__
3345 #define __FUNCT__ "MatMerge"
3346 /*@C
3347       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3348                  matrices from each processor
3349 
3350     Collective on MPI_Comm
3351 
3352    Input Parameters:
3353 +    comm - the communicators the parallel matrix will live on
3354 .    inmat - the input sequential matrices
3355 .    n - number of local columns (or PETSC_DECIDE)
3356 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3357 
3358    Output Parameter:
3359 .    outmat - the parallel matrix generated
3360 
3361     Level: advanced
3362 
3363    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3364 
3365 @*/
3366 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3367 {
3368   PetscErrorCode ierr;
3369   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3370   PetscInt       *indx;
3371   PetscScalar    *values;
3372 
3373   PetscFunctionBegin;
3374   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3375   if (scall == MAT_INITIAL_MATRIX){
3376     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3377     if (n == PETSC_DECIDE){
3378       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3379     }
3380     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3381     rstart -= m;
3382 
3383     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3384     for (i=0;i<m;i++) {
3385       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3386       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3387       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3388     }
3389     /* This routine will ONLY return MPIAIJ type matrix */
3390     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3391     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3392     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3393     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3394     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3395 
3396   } else if (scall == MAT_REUSE_MATRIX){
3397     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3398   } else {
3399     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3400   }
3401 
3402   for (i=0;i<m;i++) {
3403     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3404     Ii    = i + rstart;
3405     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3406     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3407   }
3408   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3409   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3410   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3411 
3412   PetscFunctionReturn(0);
3413 }
3414 
3415 #undef __FUNCT__
3416 #define __FUNCT__ "MatFileSplit"
3417 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3418 {
3419   PetscErrorCode    ierr;
3420   PetscMPIInt       rank;
3421   PetscInt          m,N,i,rstart,nnz;
3422   size_t            len;
3423   const PetscInt    *indx;
3424   PetscViewer       out;
3425   char              *name;
3426   Mat               B;
3427   const PetscScalar *values;
3428 
3429   PetscFunctionBegin;
3430   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3431   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3432   /* Should this be the type of the diagonal block of A? */
3433   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3434   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3435   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3436   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3437   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3438   for (i=0;i<m;i++) {
3439     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3440     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3441     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3442   }
3443   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3444   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3445 
3446   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
3447   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3448   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3449   sprintf(name,"%s.%d",outfile,rank);
3450   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3451   ierr = PetscFree(name);
3452   ierr = MatView(B,out);CHKERRQ(ierr);
3453   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3454   ierr = MatDestroy(B);CHKERRQ(ierr);
3455   PetscFunctionReturn(0);
3456 }
3457 
3458 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3459 #undef __FUNCT__
3460 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3461 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3462 {
3463   PetscErrorCode       ierr;
3464   Mat_Merge_SeqsToMPI  *merge;
3465   PetscContainer       container;
3466 
3467   PetscFunctionBegin;
3468   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3469   if (container) {
3470     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3471     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3472     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3473     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3474     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3475     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3476     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3477     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3478     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3479     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3480     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3481     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3482 
3483     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3484     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3485   }
3486   ierr = PetscFree(merge);CHKERRQ(ierr);
3487 
3488   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3489   PetscFunctionReturn(0);
3490 }
3491 
3492 #include "src/mat/utils/freespace.h"
3493 #include "petscbt.h"
3494 static PetscEvent logkey_seqstompinum = 0;
3495 #undef __FUNCT__
3496 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3497 /*@C
3498       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3499                  matrices from each processor
3500 
3501     Collective on MPI_Comm
3502 
3503    Input Parameters:
3504 +    comm - the communicators the parallel matrix will live on
3505 .    seqmat - the input sequential matrices
3506 .    m - number of local rows (or PETSC_DECIDE)
3507 .    n - number of local columns (or PETSC_DECIDE)
3508 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3509 
3510    Output Parameter:
3511 .    mpimat - the parallel matrix generated
3512 
3513     Level: advanced
3514 
3515    Notes:
3516      The dimensions of the sequential matrix in each processor MUST be the same.
3517      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3518      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3519 @*/
3520 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3521 {
3522   PetscErrorCode       ierr;
3523   MPI_Comm             comm=mpimat->comm;
3524   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3525   PetscMPIInt          size,rank,taga,*len_s;
3526   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3527   PetscInt             proc,m;
3528   PetscInt             **buf_ri,**buf_rj;
3529   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3530   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3531   MPI_Request          *s_waits,*r_waits;
3532   MPI_Status           *status;
3533   MatScalar            *aa=a->a,**abuf_r,*ba_i;
3534   Mat_Merge_SeqsToMPI  *merge;
3535   PetscContainer       container;
3536 
3537   PetscFunctionBegin;
3538   if (!logkey_seqstompinum) {
3539     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
3540   }
3541   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3542 
3543   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3544   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3545 
3546   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3547   if (container) {
3548     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3549   }
3550   bi     = merge->bi;
3551   bj     = merge->bj;
3552   buf_ri = merge->buf_ri;
3553   buf_rj = merge->buf_rj;
3554 
3555   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3556   owners = merge->rowmap.range;
3557   len_s  = merge->len_s;
3558 
3559   /* send and recv matrix values */
3560   /*-----------------------------*/
3561   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3562   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3563 
3564   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3565   for (proc=0,k=0; proc<size; proc++){
3566     if (!len_s[proc]) continue;
3567     i = owners[proc];
3568     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3569     k++;
3570   }
3571 
3572   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3573   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3574   ierr = PetscFree(status);CHKERRQ(ierr);
3575 
3576   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3577   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3578 
3579   /* insert mat values of mpimat */
3580   /*----------------------------*/
3581   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3582   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3583   nextrow = buf_ri_k + merge->nrecv;
3584   nextai  = nextrow + merge->nrecv;
3585 
3586   for (k=0; k<merge->nrecv; k++){
3587     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3588     nrows = *(buf_ri_k[k]);
3589     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3590     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3591   }
3592 
3593   /* set values of ba */
3594   m = merge->rowmap.n;
3595   for (i=0; i<m; i++) {
3596     arow = owners[rank] + i;
3597     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3598     bnzi = bi[i+1] - bi[i];
3599     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3600 
3601     /* add local non-zero vals of this proc's seqmat into ba */
3602     anzi = ai[arow+1] - ai[arow];
3603     aj   = a->j + ai[arow];
3604     aa   = a->a + ai[arow];
3605     nextaj = 0;
3606     for (j=0; nextaj<anzi; j++){
3607       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3608         ba_i[j] += aa[nextaj++];
3609       }
3610     }
3611 
3612     /* add received vals into ba */
3613     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3614       /* i-th row */
3615       if (i == *nextrow[k]) {
3616         anzi = *(nextai[k]+1) - *nextai[k];
3617         aj   = buf_rj[k] + *(nextai[k]);
3618         aa   = abuf_r[k] + *(nextai[k]);
3619         nextaj = 0;
3620         for (j=0; nextaj<anzi; j++){
3621           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3622             ba_i[j] += aa[nextaj++];
3623           }
3624         }
3625         nextrow[k]++; nextai[k]++;
3626       }
3627     }
3628     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3629   }
3630   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3631   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3632 
3633   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3634   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3635   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3636   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3637   PetscFunctionReturn(0);
3638 }
3639 
3640 static PetscEvent logkey_seqstompisym = 0;
3641 #undef __FUNCT__
3642 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3643 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3644 {
3645   PetscErrorCode       ierr;
3646   Mat                  B_mpi;
3647   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3648   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3649   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3650   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3651   PetscInt             len,proc,*dnz,*onz;
3652   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3653   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3654   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3655   MPI_Status           *status;
3656   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3657   PetscBT              lnkbt;
3658   Mat_Merge_SeqsToMPI  *merge;
3659   PetscContainer       container;
3660 
3661   PetscFunctionBegin;
3662   if (!logkey_seqstompisym) {
3663     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3664   }
3665   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3666 
3667   /* make sure it is a PETSc comm */
3668   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3669   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3670   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3671 
3672   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3673   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3674 
3675   /* determine row ownership */
3676   /*---------------------------------------------------------*/
3677   merge->rowmap.n = m;
3678   merge->rowmap.N = M;
3679   merge->rowmap.bs = 1;
3680   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3681   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3682   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3683 
3684   m      = merge->rowmap.n;
3685   M      = merge->rowmap.N;
3686   owners = merge->rowmap.range;
3687 
3688   /* determine the number of messages to send, their lengths */
3689   /*---------------------------------------------------------*/
3690   len_s  = merge->len_s;
3691 
3692   len = 0;  /* length of buf_si[] */
3693   merge->nsend = 0;
3694   for (proc=0; proc<size; proc++){
3695     len_si[proc] = 0;
3696     if (proc == rank){
3697       len_s[proc] = 0;
3698     } else {
3699       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3700       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3701     }
3702     if (len_s[proc]) {
3703       merge->nsend++;
3704       nrows = 0;
3705       for (i=owners[proc]; i<owners[proc+1]; i++){
3706         if (ai[i+1] > ai[i]) nrows++;
3707       }
3708       len_si[proc] = 2*(nrows+1);
3709       len += len_si[proc];
3710     }
3711   }
3712 
3713   /* determine the number and length of messages to receive for ij-structure */
3714   /*-------------------------------------------------------------------------*/
3715   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3716   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3717 
3718   /* post the Irecv of j-structure */
3719   /*-------------------------------*/
3720   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
3721   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3722 
3723   /* post the Isend of j-structure */
3724   /*--------------------------------*/
3725   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3726   sj_waits = si_waits + merge->nsend;
3727 
3728   for (proc=0, k=0; proc<size; proc++){
3729     if (!len_s[proc]) continue;
3730     i = owners[proc];
3731     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3732     k++;
3733   }
3734 
3735   /* receives and sends of j-structure are complete */
3736   /*------------------------------------------------*/
3737   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3738   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3739 
3740   /* send and recv i-structure */
3741   /*---------------------------*/
3742   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
3743   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3744 
3745   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3746   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3747   for (proc=0,k=0; proc<size; proc++){
3748     if (!len_s[proc]) continue;
3749     /* form outgoing message for i-structure:
3750          buf_si[0]:                 nrows to be sent
3751                [1:nrows]:           row index (global)
3752                [nrows+1:2*nrows+1]: i-structure index
3753     */
3754     /*-------------------------------------------*/
3755     nrows = len_si[proc]/2 - 1;
3756     buf_si_i    = buf_si + nrows+1;
3757     buf_si[0]   = nrows;
3758     buf_si_i[0] = 0;
3759     nrows = 0;
3760     for (i=owners[proc]; i<owners[proc+1]; i++){
3761       anzi = ai[i+1] - ai[i];
3762       if (anzi) {
3763         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3764         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3765         nrows++;
3766       }
3767     }
3768     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3769     k++;
3770     buf_si += len_si[proc];
3771   }
3772 
3773   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3774   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3775 
3776   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3777   for (i=0; i<merge->nrecv; i++){
3778     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3779   }
3780 
3781   ierr = PetscFree(len_si);CHKERRQ(ierr);
3782   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3783   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3784   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3785   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3786   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3787   ierr = PetscFree(status);CHKERRQ(ierr);
3788 
3789   /* compute a local seq matrix in each processor */
3790   /*----------------------------------------------*/
3791   /* allocate bi array and free space for accumulating nonzero column info */
3792   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3793   bi[0] = 0;
3794 
3795   /* create and initialize a linked list */
3796   nlnk = N+1;
3797   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3798 
3799   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3800   len = 0;
3801   len  = ai[owners[rank+1]] - ai[owners[rank]];
3802   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3803   current_space = free_space;
3804 
3805   /* determine symbolic info for each local row */
3806   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3807   nextrow = buf_ri_k + merge->nrecv;
3808   nextai  = nextrow + merge->nrecv;
3809   for (k=0; k<merge->nrecv; k++){
3810     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3811     nrows = *buf_ri_k[k];
3812     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3813     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3814   }
3815 
3816   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3817   len = 0;
3818   for (i=0;i<m;i++) {
3819     bnzi   = 0;
3820     /* add local non-zero cols of this proc's seqmat into lnk */
3821     arow   = owners[rank] + i;
3822     anzi   = ai[arow+1] - ai[arow];
3823     aj     = a->j + ai[arow];
3824     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3825     bnzi += nlnk;
3826     /* add received col data into lnk */
3827     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3828       if (i == *nextrow[k]) { /* i-th row */
3829         anzi = *(nextai[k]+1) - *nextai[k];
3830         aj   = buf_rj[k] + *nextai[k];
3831         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3832         bnzi += nlnk;
3833         nextrow[k]++; nextai[k]++;
3834       }
3835     }
3836     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3837 
3838     /* if free space is not available, make more free space */
3839     if (current_space->local_remaining<bnzi) {
3840       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3841       nspacedouble++;
3842     }
3843     /* copy data into free space, then initialize lnk */
3844     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3845     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3846 
3847     current_space->array           += bnzi;
3848     current_space->local_used      += bnzi;
3849     current_space->local_remaining -= bnzi;
3850 
3851     bi[i+1] = bi[i] + bnzi;
3852   }
3853 
3854   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3855 
3856   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3857   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3858   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3859 
3860   /* create symbolic parallel matrix B_mpi */
3861   /*---------------------------------------*/
3862   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3863   if (n==PETSC_DECIDE) {
3864     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3865   } else {
3866     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3867   }
3868   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3869   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3870   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3871 
3872   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3873   B_mpi->assembled     = PETSC_FALSE;
3874   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3875   merge->bi            = bi;
3876   merge->bj            = bj;
3877   merge->buf_ri        = buf_ri;
3878   merge->buf_rj        = buf_rj;
3879   merge->coi           = PETSC_NULL;
3880   merge->coj           = PETSC_NULL;
3881   merge->owners_co     = PETSC_NULL;
3882 
3883   /* attach the supporting struct to B_mpi for reuse */
3884   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3885   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
3886   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3887   *mpimat = B_mpi;
3888 
3889   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3890   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3891   PetscFunctionReturn(0);
3892 }
3893 
3894 static PetscEvent logkey_seqstompi = 0;
3895 #undef __FUNCT__
3896 #define __FUNCT__ "MatMerge_SeqsToMPI"
3897 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3898 {
3899   PetscErrorCode   ierr;
3900 
3901   PetscFunctionBegin;
3902   if (!logkey_seqstompi) {
3903     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3904   }
3905   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3906   if (scall == MAT_INITIAL_MATRIX){
3907     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3908   }
3909   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3910   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3911   PetscFunctionReturn(0);
3912 }
3913 static PetscEvent logkey_getlocalmat = 0;
3914 #undef __FUNCT__
3915 #define __FUNCT__ "MatGetLocalMat"
3916 /*@C
3917      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3918 
3919     Not Collective
3920 
3921    Input Parameters:
3922 +    A - the matrix
3923 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3924 
3925    Output Parameter:
3926 .    A_loc - the local sequential matrix generated
3927 
3928     Level: developer
3929 
3930 @*/
3931 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3932 {
3933   PetscErrorCode  ierr;
3934   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3935   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3936   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3937   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3938   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
3939   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3940 
3941   PetscFunctionBegin;
3942   if (!logkey_getlocalmat) {
3943     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3944   }
3945   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3946   if (scall == MAT_INITIAL_MATRIX){
3947     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3948     ci[0] = 0;
3949     for (i=0; i<am; i++){
3950       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3951     }
3952     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3953     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3954     k = 0;
3955     for (i=0; i<am; i++) {
3956       ncols_o = bi[i+1] - bi[i];
3957       ncols_d = ai[i+1] - ai[i];
3958       /* off-diagonal portion of A */
3959       for (jo=0; jo<ncols_o; jo++) {
3960         col = cmap[*bj];
3961         if (col >= cstart) break;
3962         cj[k]   = col; bj++;
3963         ca[k++] = *ba++;
3964       }
3965       /* diagonal portion of A */
3966       for (j=0; j<ncols_d; j++) {
3967         cj[k]   = cstart + *aj++;
3968         ca[k++] = *aa++;
3969       }
3970       /* off-diagonal portion of A */
3971       for (j=jo; j<ncols_o; j++) {
3972         cj[k]   = cmap[*bj++];
3973         ca[k++] = *ba++;
3974       }
3975     }
3976     /* put together the new matrix */
3977     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3978     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3979     /* Since these are PETSc arrays, change flags to free them as necessary. */
3980     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
3981     mat->free_a  = PETSC_TRUE;
3982     mat->free_ij = PETSC_TRUE;
3983     mat->nonew   = 0;
3984   } else if (scall == MAT_REUSE_MATRIX){
3985     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3986     ci = mat->i; cj = mat->j; ca = mat->a;
3987     for (i=0; i<am; i++) {
3988       /* off-diagonal portion of A */
3989       ncols_o = bi[i+1] - bi[i];
3990       for (jo=0; jo<ncols_o; jo++) {
3991         col = cmap[*bj];
3992         if (col >= cstart) break;
3993         *ca++ = *ba++; bj++;
3994       }
3995       /* diagonal portion of A */
3996       ncols_d = ai[i+1] - ai[i];
3997       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3998       /* off-diagonal portion of A */
3999       for (j=jo; j<ncols_o; j++) {
4000         *ca++ = *ba++; bj++;
4001       }
4002     }
4003   } else {
4004     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4005   }
4006 
4007   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
4008   PetscFunctionReturn(0);
4009 }
4010 
4011 static PetscEvent logkey_getlocalmatcondensed = 0;
4012 #undef __FUNCT__
4013 #define __FUNCT__ "MatGetLocalMatCondensed"
4014 /*@C
4015      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4016 
4017     Not Collective
4018 
4019    Input Parameters:
4020 +    A - the matrix
4021 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4022 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4023 
4024    Output Parameter:
4025 .    A_loc - the local sequential matrix generated
4026 
4027     Level: developer
4028 
4029 @*/
4030 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4031 {
4032   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4033   PetscErrorCode    ierr;
4034   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4035   IS                isrowa,iscola;
4036   Mat               *aloc;
4037 
4038   PetscFunctionBegin;
4039   if (!logkey_getlocalmatcondensed) {
4040     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
4041   }
4042   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4043   if (!row){
4044     start = A->rmap.rstart; end = A->rmap.rend;
4045     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4046   } else {
4047     isrowa = *row;
4048   }
4049   if (!col){
4050     start = A->cmap.rstart;
4051     cmap  = a->garray;
4052     nzA   = a->A->cmap.n;
4053     nzB   = a->B->cmap.n;
4054     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4055     ncols = 0;
4056     for (i=0; i<nzB; i++) {
4057       if (cmap[i] < start) idx[ncols++] = cmap[i];
4058       else break;
4059     }
4060     imark = i;
4061     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4062     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4063     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4064     ierr = PetscFree(idx);CHKERRQ(ierr);
4065   } else {
4066     iscola = *col;
4067   }
4068   if (scall != MAT_INITIAL_MATRIX){
4069     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4070     aloc[0] = *A_loc;
4071   }
4072   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4073   *A_loc = aloc[0];
4074   ierr = PetscFree(aloc);CHKERRQ(ierr);
4075   if (!row){
4076     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4077   }
4078   if (!col){
4079     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4080   }
4081   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4082   PetscFunctionReturn(0);
4083 }
4084 
4085 static PetscEvent logkey_GetBrowsOfAcols = 0;
4086 #undef __FUNCT__
4087 #define __FUNCT__ "MatGetBrowsOfAcols"
4088 /*@C
4089     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4090 
4091     Collective on Mat
4092 
4093    Input Parameters:
4094 +    A,B - the matrices in mpiaij format
4095 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4096 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4097 
4098    Output Parameter:
4099 +    rowb, colb - index sets of rows and columns of B to extract
4100 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
4101 -    B_seq - the sequential matrix generated
4102 
4103     Level: developer
4104 
4105 @*/
4106 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4107 {
4108   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4109   PetscErrorCode    ierr;
4110   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4111   IS                isrowb,iscolb;
4112   Mat               *bseq;
4113 
4114   PetscFunctionBegin;
4115   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4116     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4117   }
4118   if (!logkey_GetBrowsOfAcols) {
4119     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
4120   }
4121   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4122 
4123   if (scall == MAT_INITIAL_MATRIX){
4124     start = A->cmap.rstart;
4125     cmap  = a->garray;
4126     nzA   = a->A->cmap.n;
4127     nzB   = a->B->cmap.n;
4128     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4129     ncols = 0;
4130     for (i=0; i<nzB; i++) {  /* row < local row index */
4131       if (cmap[i] < start) idx[ncols++] = cmap[i];
4132       else break;
4133     }
4134     imark = i;
4135     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4136     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4137     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4138     ierr = PetscFree(idx);CHKERRQ(ierr);
4139     *brstart = imark;
4140     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
4141   } else {
4142     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4143     isrowb = *rowb; iscolb = *colb;
4144     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4145     bseq[0] = *B_seq;
4146   }
4147   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4148   *B_seq = bseq[0];
4149   ierr = PetscFree(bseq);CHKERRQ(ierr);
4150   if (!rowb){
4151     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4152   } else {
4153     *rowb = isrowb;
4154   }
4155   if (!colb){
4156     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4157   } else {
4158     *colb = iscolb;
4159   }
4160   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4161   PetscFunctionReturn(0);
4162 }
4163 
4164 static PetscEvent logkey_GetBrowsOfAocols = 0;
4165 #undef __FUNCT__
4166 #define __FUNCT__ "MatGetBrowsOfAoCols"
4167 /*@C
4168     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4169     of the OFF-DIAGONAL portion of local A
4170 
4171     Collective on Mat
4172 
4173    Input Parameters:
4174 +    A,B - the matrices in mpiaij format
4175 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4176 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4177 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4178 
4179    Output Parameter:
4180 +    B_oth - the sequential matrix generated
4181 
4182     Level: developer
4183 
4184 @*/
4185 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
4186 {
4187   VecScatter_MPI_General *gen_to,*gen_from;
4188   PetscErrorCode         ierr;
4189   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4190   Mat_SeqAIJ             *b_oth;
4191   VecScatter             ctx=a->Mvctx;
4192   MPI_Comm               comm=ctx->comm;
4193   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
4194   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
4195   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
4196   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4197   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4198   MPI_Status             *sstatus,rstatus;
4199   PetscInt               *cols,sbs,rbs;
4200   PetscScalar            *vals;
4201 
4202   PetscFunctionBegin;
4203   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4204     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4205   }
4206   if (!logkey_GetBrowsOfAocols) {
4207     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
4208   }
4209   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4210   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4211 
4212   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4213   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4214   rvalues  = gen_from->values; /* holds the length of receiving row */
4215   svalues  = gen_to->values;   /* holds the length of sending row */
4216   nrecvs   = gen_from->n;
4217   nsends   = gen_to->n;
4218 
4219   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4220   srow     = gen_to->indices;   /* local row index to be sent */
4221   sstarts  = gen_to->starts;
4222   sprocs   = gen_to->procs;
4223   sstatus  = gen_to->sstatus;
4224   sbs      = gen_to->bs;
4225   rstarts  = gen_from->starts;
4226   rprocs   = gen_from->procs;
4227   rbs      = gen_from->bs;
4228 
4229   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4230   if (scall == MAT_INITIAL_MATRIX){
4231     /* i-array */
4232     /*---------*/
4233     /*  post receives */
4234     for (i=0; i<nrecvs; i++){
4235       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4236       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4237       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4238     }
4239 
4240     /* pack the outgoing message */
4241     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4242     rstartsj = sstartsj + nsends +1;
4243     sstartsj[0] = 0;  rstartsj[0] = 0;
4244     len = 0; /* total length of j or a array to be sent */
4245     k = 0;
4246     for (i=0; i<nsends; i++){
4247       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4248       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4249       for (j=0; j<nrows; j++) {
4250         row = srow[k] + B->rmap.range[rank]; /* global row idx */
4251         for (l=0; l<sbs; l++){
4252           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4253           rowlen[j*sbs+l] = ncols;
4254           len += ncols;
4255           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4256         }
4257         k++;
4258       }
4259       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4260       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4261     }
4262     /* recvs and sends of i-array are completed */
4263     i = nrecvs;
4264     while (i--) {
4265       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
4266     }
4267     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4268 
4269     /* allocate buffers for sending j and a arrays */
4270     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4271     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4272 
4273     /* create i-array of B_oth */
4274     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4275     b_othi[0] = 0;
4276     len = 0; /* total length of j or a array to be received */
4277     k = 0;
4278     for (i=0; i<nrecvs; i++){
4279       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4280       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4281       for (j=0; j<nrows; j++) {
4282         b_othi[k+1] = b_othi[k] + rowlen[j];
4283         len += rowlen[j]; k++;
4284       }
4285       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4286     }
4287 
4288     /* allocate space for j and a arrrays of B_oth */
4289     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4290     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
4291 
4292     /* j-array */
4293     /*---------*/
4294     /*  post receives of j-array */
4295     for (i=0; i<nrecvs; i++){
4296       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4297       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4298     }
4299 
4300     /* pack the outgoing message j-array */
4301     k = 0;
4302     for (i=0; i<nsends; i++){
4303       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4304       bufJ = bufj+sstartsj[i];
4305       for (j=0; j<nrows; j++) {
4306         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4307         for (ll=0; ll<sbs; ll++){
4308           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4309           for (l=0; l<ncols; l++){
4310             *bufJ++ = cols[l];
4311           }
4312           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4313         }
4314       }
4315       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4316     }
4317 
4318     /* recvs and sends of j-array are completed */
4319     i = nrecvs;
4320     while (i--) {
4321       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
4322     }
4323     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4324   } else if (scall == MAT_REUSE_MATRIX){
4325     sstartsj = *startsj;
4326     rstartsj = sstartsj + nsends +1;
4327     bufa     = *bufa_ptr;
4328     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4329     b_otha   = b_oth->a;
4330   } else {
4331     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4332   }
4333 
4334   /* a-array */
4335   /*---------*/
4336   /*  post receives of a-array */
4337   for (i=0; i<nrecvs; i++){
4338     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4339     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4340   }
4341 
4342   /* pack the outgoing message a-array */
4343   k = 0;
4344   for (i=0; i<nsends; i++){
4345     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4346     bufA = bufa+sstartsj[i];
4347     for (j=0; j<nrows; j++) {
4348       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4349       for (ll=0; ll<sbs; ll++){
4350         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4351         for (l=0; l<ncols; l++){
4352           *bufA++ = vals[l];
4353         }
4354         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4355       }
4356     }
4357     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4358   }
4359   /* recvs and sends of a-array are completed */
4360   i = nrecvs;
4361   while (i--) {
4362     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
4363   }
4364   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4365   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4366 
4367   if (scall == MAT_INITIAL_MATRIX){
4368     /* put together the new matrix */
4369     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4370 
4371     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4372     /* Since these are PETSc arrays, change flags to free them as necessary. */
4373     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4374     b_oth->free_a  = PETSC_TRUE;
4375     b_oth->free_ij = PETSC_TRUE;
4376     b_oth->nonew   = 0;
4377 
4378     ierr = PetscFree(bufj);CHKERRQ(ierr);
4379     if (!startsj || !bufa_ptr){
4380       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4381       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4382     } else {
4383       *startsj  = sstartsj;
4384       *bufa_ptr = bufa;
4385     }
4386   }
4387   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4388   PetscFunctionReturn(0);
4389 }
4390 
4391 #undef __FUNCT__
4392 #define __FUNCT__ "MatGetCommunicationStructs"
4393 /*@C
4394   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4395 
4396   Not Collective
4397 
4398   Input Parameters:
4399 . A - The matrix in mpiaij format
4400 
4401   Output Parameter:
4402 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4403 . colmap - A map from global column index to local index into lvec
4404 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4405 
4406   Level: developer
4407 
4408 @*/
4409 #if defined (PETSC_USE_CTABLE)
4410 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4411 #else
4412 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4413 #endif
4414 {
4415   Mat_MPIAIJ *a;
4416 
4417   PetscFunctionBegin;
4418   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4419   PetscValidPointer(lvec, 2)
4420   PetscValidPointer(colmap, 3)
4421   PetscValidPointer(multScatter, 4)
4422   a = (Mat_MPIAIJ *) A->data;
4423   if (lvec) *lvec = a->lvec;
4424   if (colmap) *colmap = a->colmap;
4425   if (multScatter) *multScatter = a->Mvctx;
4426   PetscFunctionReturn(0);
4427 }
4428 
4429 EXTERN_C_BEGIN
4430 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*);
4431 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*);
4432 EXTERN_C_END
4433 
4434 /*MC
4435    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4436 
4437    Options Database Keys:
4438 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4439 
4440   Level: beginner
4441 
4442 .seealso: MatCreateMPIAIJ
4443 M*/
4444 
4445 EXTERN_C_BEGIN
4446 #undef __FUNCT__
4447 #define __FUNCT__ "MatCreate_MPIAIJ"
4448 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4449 {
4450   Mat_MPIAIJ     *b;
4451   PetscErrorCode ierr;
4452   PetscMPIInt    size;
4453 
4454   PetscFunctionBegin;
4455   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
4456 
4457   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
4458   B->data         = (void*)b;
4459   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4460   B->factor       = 0;
4461   B->rmap.bs      = 1;
4462   B->assembled    = PETSC_FALSE;
4463   B->mapping      = 0;
4464 
4465   B->insertmode      = NOT_SET_VALUES;
4466   b->size            = size;
4467   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
4468 
4469   /* build cache for off array entries formed */
4470   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
4471   b->donotstash  = PETSC_FALSE;
4472   b->colmap      = 0;
4473   b->garray      = 0;
4474   b->roworiented = PETSC_TRUE;
4475 
4476   /* stuff used for matrix vector multiply */
4477   b->lvec      = PETSC_NULL;
4478   b->Mvctx     = PETSC_NULL;
4479 
4480   /* stuff for MatGetRow() */
4481   b->rowindices   = 0;
4482   b->rowvalues    = 0;
4483   b->getrowactive = PETSC_FALSE;
4484 
4485 
4486   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4487                                      "MatStoreValues_MPIAIJ",
4488                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4489   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4490                                      "MatRetrieveValues_MPIAIJ",
4491                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4492   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4493 				     "MatGetDiagonalBlock_MPIAIJ",
4494                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4495   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4496 				     "MatIsTranspose_MPIAIJ",
4497 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4498   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4499 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4500 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4501   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4502 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4503 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4504   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4505 				     "MatDiagonalScaleLocal_MPIAIJ",
4506 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4507   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4508                                      "MatConvert_MPIAIJ_MPICSRPERM",
4509                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
4510   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4511                                      "MatConvert_MPIAIJ_MPICRL",
4512                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
4513   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4514   PetscFunctionReturn(0);
4515 }
4516 EXTERN_C_END
4517 
4518 #undef __FUNCT__
4519 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
4520 /*@C
4521      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4522          and "off-diagonal" part of the matrix in CSR format.
4523 
4524    Collective on MPI_Comm
4525 
4526    Input Parameters:
4527 +  comm - MPI communicator
4528 .  m - number of local rows (Cannot be PETSC_DECIDE)
4529 .  n - This value should be the same as the local size used in creating the
4530        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4531        calculated if N is given) For square matrices n is almost always m.
4532 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4533 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4534 .   i - row indices for "diagonal" portion of matrix
4535 .   j - column indices
4536 .   a - matrix values
4537 .   oi - row indices for "off-diagonal" portion of matrix
4538 .   oj - column indices
4539 -   oa - matrix values
4540 
4541    Output Parameter:
4542 .   mat - the matrix
4543 
4544    Level: advanced
4545 
4546    Notes:
4547        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4548 
4549        The i and j indices are 0 based
4550 
4551        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4552 
4553 
4554 .keywords: matrix, aij, compressed row, sparse, parallel
4555 
4556 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4557           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4558 @*/
4559 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4560 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4561 {
4562   PetscErrorCode ierr;
4563   Mat_MPIAIJ     *maij;
4564 
4565  PetscFunctionBegin;
4566   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4567   if (i[0]) {
4568     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4569   }
4570   if (oi[0]) {
4571     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4572   }
4573   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4574   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4575   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4576   maij = (Mat_MPIAIJ*) (*mat)->data;
4577   maij->donotstash     = PETSC_TRUE;
4578   (*mat)->preallocated = PETSC_TRUE;
4579 
4580   (*mat)->rmap.bs = (*mat)->cmap.bs = 1;
4581   ierr = PetscMapInitialize((*mat)->comm,&(*mat)->rmap);CHKERRQ(ierr);
4582   ierr = PetscMapInitialize((*mat)->comm,&(*mat)->cmap);CHKERRQ(ierr);
4583 
4584   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
4585   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
4586 
4587   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4588   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4589   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4590   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4591 
4592   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4593   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4594   PetscFunctionReturn(0);
4595 }
4596 
4597 /*
4598     Special version for direct calls from Fortran
4599 */
4600 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4601 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4602 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4603 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4604 #endif
4605 
4606 /* Change these macros so can be used in void function */
4607 #undef CHKERRQ
4608 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
4609 #undef SETERRQ2
4610 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
4611 #undef SETERRQ
4612 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
4613 
4614 EXTERN_C_BEGIN
4615 #undef __FUNCT__
4616 #define __FUNCT__ "matsetvaluesmpiaij_"
4617 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4618 {
4619   Mat            mat = *mmat;
4620   PetscInt       m = *mm, n = *mn;
4621   InsertMode     addv = *maddv;
4622   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
4623   PetscScalar    value;
4624   PetscErrorCode ierr;
4625 
4626   MatPreallocated(mat);
4627   if (mat->insertmode == NOT_SET_VALUES) {
4628     mat->insertmode = addv;
4629   }
4630 #if defined(PETSC_USE_DEBUG)
4631   else if (mat->insertmode != addv) {
4632     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4633   }
4634 #endif
4635   {
4636   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4637   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4638   PetscTruth     roworiented = aij->roworiented;
4639 
4640   /* Some Variables required in the macro */
4641   Mat            A = aij->A;
4642   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
4643   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4644   PetscScalar    *aa = a->a;
4645   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4646   Mat            B = aij->B;
4647   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4648   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4649   PetscScalar    *ba = b->a;
4650 
4651   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4652   PetscInt       nonew = a->nonew;
4653   PetscScalar    *ap1,*ap2;
4654 
4655   PetscFunctionBegin;
4656   for (i=0; i<m; i++) {
4657     if (im[i] < 0) continue;
4658 #if defined(PETSC_USE_DEBUG)
4659     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4660 #endif
4661     if (im[i] >= rstart && im[i] < rend) {
4662       row      = im[i] - rstart;
4663       lastcol1 = -1;
4664       rp1      = aj + ai[row];
4665       ap1      = aa + ai[row];
4666       rmax1    = aimax[row];
4667       nrow1    = ailen[row];
4668       low1     = 0;
4669       high1    = nrow1;
4670       lastcol2 = -1;
4671       rp2      = bj + bi[row];
4672       ap2      = ba + bi[row];
4673       rmax2    = bimax[row];
4674       nrow2    = bilen[row];
4675       low2     = 0;
4676       high2    = nrow2;
4677 
4678       for (j=0; j<n; j++) {
4679         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4680         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4681         if (in[j] >= cstart && in[j] < cend){
4682           col = in[j] - cstart;
4683           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4684         } else if (in[j] < 0) continue;
4685 #if defined(PETSC_USE_DEBUG)
4686         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4687 #endif
4688         else {
4689           if (mat->was_assembled) {
4690             if (!aij->colmap) {
4691               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4692             }
4693 #if defined (PETSC_USE_CTABLE)
4694             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4695 	    col--;
4696 #else
4697             col = aij->colmap[in[j]] - 1;
4698 #endif
4699             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4700               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4701               col =  in[j];
4702               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4703               B = aij->B;
4704               b = (Mat_SeqAIJ*)B->data;
4705               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4706               rp2      = bj + bi[row];
4707               ap2      = ba + bi[row];
4708               rmax2    = bimax[row];
4709               nrow2    = bilen[row];
4710               low2     = 0;
4711               high2    = nrow2;
4712               bm       = aij->B->rmap.n;
4713               ba = b->a;
4714             }
4715           } else col = in[j];
4716           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4717         }
4718       }
4719     } else {
4720       if (!aij->donotstash) {
4721         if (roworiented) {
4722           if (ignorezeroentries && v[i*n] == 0.0) continue;
4723           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4724         } else {
4725           if (ignorezeroentries && v[i] == 0.0) continue;
4726           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4727         }
4728       }
4729     }
4730   }}
4731   PetscFunctionReturnVoid();
4732 }
4733 EXTERN_C_END
4734 
4735