xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 290bbb0a1dcfb34dbf94efcfcc44171581b0efea)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatGetValues_MPIAIJ"
241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
242 {
243   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
244   PetscErrorCode ierr;
245   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
246   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
247 
248   PetscFunctionBegin;
249   for (i=0; i<m; i++) {
250     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
251     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
252     if (idxm[i] >= rstart && idxm[i] < rend) {
253       row = idxm[i] - rstart;
254       for (j=0; j<n; j++) {
255         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
256         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
257         if (idxn[j] >= cstart && idxn[j] < cend){
258           col = idxn[j] - cstart;
259           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
260         } else {
261           if (!aij->colmap) {
262             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
263           }
264 #if defined (PETSC_USE_CTABLE)
265           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
266           col --;
267 #else
268           col = aij->colmap[idxn[j]] - 1;
269 #endif
270           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
271           else {
272             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
273           }
274         }
275       }
276     } else {
277       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
278     }
279   }
280   PetscFunctionReturn(0);
281 }
282 
283 #undef __FUNCT__
284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
286 {
287   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
288   PetscErrorCode ierr;
289   PetscInt       nstash,reallocs;
290   InsertMode     addv;
291 
292   PetscFunctionBegin;
293   if (aij->donotstash) {
294     PetscFunctionReturn(0);
295   }
296 
297   /* make sure all processors are either in INSERTMODE or ADDMODE */
298   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
299   if (addv == (ADD_VALUES|INSERT_VALUES)) {
300     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
301   }
302   mat->insertmode = addv; /* in case this processor had no cache */
303 
304   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
305   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
306   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
307   PetscFunctionReturn(0);
308 }
309 
310 #undef __FUNCT__
311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
313 {
314   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
315   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
316   PetscErrorCode ierr;
317   PetscMPIInt    n;
318   PetscInt       i,j,rstart,ncols,flg;
319   PetscInt       *row,*col,other_disassembled;
320   PetscScalar    *val;
321   InsertMode     addv = mat->insertmode;
322 
323   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
324   PetscFunctionBegin;
325   if (!aij->donotstash) {
326     while (1) {
327       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
328       if (!flg) break;
329 
330       for (i=0; i<n;) {
331         /* Now identify the consecutive vals belonging to the same row */
332         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
333         if (j < n) ncols = j-i;
334         else       ncols = n-i;
335         /* Now assemble all these values with a single function call */
336         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
337         i = j;
338       }
339     }
340     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
341   }
342   a->compressedrow.use     = PETSC_FALSE;
343   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
344   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
345 
346   /* determine if any processor has disassembled, if so we must
347      also disassemble ourselfs, in order that we may reassemble. */
348   /*
349      if nonzero structure of submatrix B cannot change then we know that
350      no processor disassembled thus we can skip this stuff
351   */
352   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
353     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
354     if (mat->was_assembled && !other_disassembled) {
355       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
356     }
357   }
358   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
359     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
360   }
361   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
362   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
363   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
364   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
365 
366   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
367   aij->rowvalues = 0;
368 
369   /* used by MatAXPY() */
370   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
371   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
372 
373   PetscFunctionReturn(0);
374 }
375 
376 #undef __FUNCT__
377 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
378 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
379 {
380   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
381   PetscErrorCode ierr;
382 
383   PetscFunctionBegin;
384   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
385   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
386   PetscFunctionReturn(0);
387 }
388 
389 #undef __FUNCT__
390 #define __FUNCT__ "MatZeroRows_MPIAIJ"
391 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
392 {
393   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
394   PetscErrorCode ierr;
395   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
396   PetscInt       i,*owners = A->rmap.range;
397   PetscInt       *nprocs,j,idx,nsends,row;
398   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
399   PetscInt       *rvalues,count,base,slen,*source;
400   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
401   MPI_Comm       comm = A->comm;
402   MPI_Request    *send_waits,*recv_waits;
403   MPI_Status     recv_status,*send_status;
404 #if defined(PETSC_DEBUG)
405   PetscTruth     found = PETSC_FALSE;
406 #endif
407 
408   PetscFunctionBegin;
409   /*  first count number of contributors to each processor */
410   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
411   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
412   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
413   j = 0;
414   for (i=0; i<N; i++) {
415     if (lastidx > (idx = rows[i])) j = 0;
416     lastidx = idx;
417     for (; j<size; j++) {
418       if (idx >= owners[j] && idx < owners[j+1]) {
419         nprocs[2*j]++;
420         nprocs[2*j+1] = 1;
421         owner[i] = j;
422 #if defined(PETSC_DEBUG)
423         found = PETSC_TRUE;
424 #endif
425         break;
426       }
427     }
428 #if defined(PETSC_DEBUG)
429     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
430     found = PETSC_FALSE;
431 #endif
432   }
433   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
434 
435   /* inform other processors of number of messages and max length*/
436   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
437 
438   /* post receives:   */
439   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
440   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
441   for (i=0; i<nrecvs; i++) {
442     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
443   }
444 
445   /* do sends:
446       1) starts[i] gives the starting index in svalues for stuff going to
447          the ith processor
448   */
449   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
450   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
451   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
452   starts[0] = 0;
453   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
454   for (i=0; i<N; i++) {
455     svalues[starts[owner[i]]++] = rows[i];
456   }
457 
458   starts[0] = 0;
459   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
460   count = 0;
461   for (i=0; i<size; i++) {
462     if (nprocs[2*i+1]) {
463       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
464     }
465   }
466   ierr = PetscFree(starts);CHKERRQ(ierr);
467 
468   base = owners[rank];
469 
470   /*  wait on receives */
471   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
472   source = lens + nrecvs;
473   count  = nrecvs; slen = 0;
474   while (count) {
475     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
476     /* unpack receives into our local space */
477     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
478     source[imdex]  = recv_status.MPI_SOURCE;
479     lens[imdex]    = n;
480     slen          += n;
481     count--;
482   }
483   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
484 
485   /* move the data into the send scatter */
486   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
487   count = 0;
488   for (i=0; i<nrecvs; i++) {
489     values = rvalues + i*nmax;
490     for (j=0; j<lens[i]; j++) {
491       lrows[count++] = values[j] - base;
492     }
493   }
494   ierr = PetscFree(rvalues);CHKERRQ(ierr);
495   ierr = PetscFree(lens);CHKERRQ(ierr);
496   ierr = PetscFree(owner);CHKERRQ(ierr);
497   ierr = PetscFree(nprocs);CHKERRQ(ierr);
498 
499   /* actually zap the local rows */
500   /*
501         Zero the required rows. If the "diagonal block" of the matrix
502      is square and the user wishes to set the diagonal we use separate
503      code so that MatSetValues() is not called for each diagonal allocating
504      new memory, thus calling lots of mallocs and slowing things down.
505 
506        Contributed by: Matthew Knepley
507   */
508   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
509   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
510   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
511     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
512   } else if (diag != 0.0) {
513     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
514     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
515       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
516 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
517     }
518     for (i = 0; i < slen; i++) {
519       row  = lrows[i] + rstart;
520       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
521     }
522     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
523     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
524   } else {
525     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
526   }
527   ierr = PetscFree(lrows);CHKERRQ(ierr);
528 
529   /* wait on sends */
530   if (nsends) {
531     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
532     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
533     ierr = PetscFree(send_status);CHKERRQ(ierr);
534   }
535   ierr = PetscFree(send_waits);CHKERRQ(ierr);
536   ierr = PetscFree(svalues);CHKERRQ(ierr);
537 
538   PetscFunctionReturn(0);
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatMult_MPIAIJ"
543 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
544 {
545   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
546   PetscErrorCode ierr;
547   PetscInt       nt;
548 
549   PetscFunctionBegin;
550   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
551   if (nt != A->cmap.n) {
552     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
553   }
554   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
555   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
556   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
557   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "MatMultAdd_MPIAIJ"
563 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
564 {
565   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
566   PetscErrorCode ierr;
567 
568   PetscFunctionBegin;
569   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
570   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
571   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
572   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
573   PetscFunctionReturn(0);
574 }
575 
576 #undef __FUNCT__
577 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
578 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
579 {
580   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
581   PetscErrorCode ierr;
582   PetscTruth     merged;
583 
584   PetscFunctionBegin;
585   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
586   /* do nondiagonal part */
587   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
588   if (!merged) {
589     /* send it on its way */
590     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
591     /* do local part */
592     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
593     /* receive remote parts: note this assumes the values are not actually */
594     /* added in yy until the next line, */
595     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
596   } else {
597     /* do local part */
598     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
599     /* send it on its way */
600     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
601     /* values actually were received in the Begin() but we need to call this nop */
602     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 EXTERN_C_BEGIN
608 #undef __FUNCT__
609 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
610 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
611 {
612   MPI_Comm       comm;
613   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
614   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
615   IS             Me,Notme;
616   PetscErrorCode ierr;
617   PetscInt       M,N,first,last,*notme,i;
618   PetscMPIInt    size;
619 
620   PetscFunctionBegin;
621 
622   /* Easy test: symmetric diagonal block */
623   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
624   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
625   if (!*f) PetscFunctionReturn(0);
626   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
627   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
628   if (size == 1) PetscFunctionReturn(0);
629 
630   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
631   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
632   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
633   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
634   for (i=0; i<first; i++) notme[i] = i;
635   for (i=last; i<M; i++) notme[i-last+first] = i;
636   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
637   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
638   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
639   Aoff = Aoffs[0];
640   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
641   Boff = Boffs[0];
642   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
643   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
644   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
645   ierr = ISDestroy(Me);CHKERRQ(ierr);
646   ierr = ISDestroy(Notme);CHKERRQ(ierr);
647 
648   PetscFunctionReturn(0);
649 }
650 EXTERN_C_END
651 
652 #undef __FUNCT__
653 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
654 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
655 {
656   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
657   PetscErrorCode ierr;
658 
659   PetscFunctionBegin;
660   /* do nondiagonal part */
661   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
662   /* send it on its way */
663   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
664   /* do local part */
665   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
666   /* receive remote parts */
667   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
668   PetscFunctionReturn(0);
669 }
670 
671 /*
672   This only works correctly for square matrices where the subblock A->A is the
673    diagonal block
674 */
675 #undef __FUNCT__
676 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
677 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
678 {
679   PetscErrorCode ierr;
680   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
681 
682   PetscFunctionBegin;
683   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
684   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
685     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
686   }
687   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatScale_MPIAIJ"
693 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
694 {
695   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
696   PetscErrorCode ierr;
697 
698   PetscFunctionBegin;
699   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
700   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
701   PetscFunctionReturn(0);
702 }
703 
704 #undef __FUNCT__
705 #define __FUNCT__ "MatDestroy_MPIAIJ"
706 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
707 {
708   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
709   PetscErrorCode ierr;
710 
711   PetscFunctionBegin;
712 #if defined(PETSC_USE_LOG)
713   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
714 #endif
715   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
716   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
717   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
718 #if defined (PETSC_USE_CTABLE)
719   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
720 #else
721   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
722 #endif
723   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
724   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
725   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
726   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
727   ierr = PetscFree(aij);CHKERRQ(ierr);
728 
729   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
730   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
736   PetscFunctionReturn(0);
737 }
738 
739 #undef __FUNCT__
740 #define __FUNCT__ "MatView_MPIAIJ_Binary"
741 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
742 {
743   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
744   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
745   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
746   PetscErrorCode    ierr;
747   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
748   int               fd;
749   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
750   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
751   PetscScalar       *column_values;
752 
753   PetscFunctionBegin;
754   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
755   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
756   nz   = A->nz + B->nz;
757   if (!rank) {
758     header[0] = MAT_FILE_COOKIE;
759     header[1] = mat->rmap.N;
760     header[2] = mat->cmap.N;
761     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
762     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
763     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
764     /* get largest number of rows any processor has */
765     rlen = mat->rmap.n;
766     range = mat->rmap.range;
767     for (i=1; i<size; i++) {
768       rlen = PetscMax(rlen,range[i+1] - range[i]);
769     }
770   } else {
771     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
772     rlen = mat->rmap.n;
773   }
774 
775   /* load up the local row counts */
776   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
777   for (i=0; i<mat->rmap.n; i++) {
778     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
779   }
780 
781   /* store the row lengths to the file */
782   if (!rank) {
783     MPI_Status status;
784     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
785     for (i=1; i<size; i++) {
786       rlen = range[i+1] - range[i];
787       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
788       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
789     }
790   } else {
791     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
792   }
793   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
794 
795   /* load up the local column indices */
796   nzmax = nz; /* )th processor needs space a largest processor needs */
797   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
798   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
799   cnt  = 0;
800   for (i=0; i<mat->rmap.n; i++) {
801     for (j=B->i[i]; j<B->i[i+1]; j++) {
802       if ( (col = garray[B->j[j]]) > cstart) break;
803       column_indices[cnt++] = col;
804     }
805     for (k=A->i[i]; k<A->i[i+1]; k++) {
806       column_indices[cnt++] = A->j[k] + cstart;
807     }
808     for (; j<B->i[i+1]; j++) {
809       column_indices[cnt++] = garray[B->j[j]];
810     }
811   }
812   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
813 
814   /* store the column indices to the file */
815   if (!rank) {
816     MPI_Status status;
817     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
818     for (i=1; i<size; i++) {
819       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
820       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
821       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
822       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
823     }
824   } else {
825     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
826     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
827   }
828   ierr = PetscFree(column_indices);CHKERRQ(ierr);
829 
830   /* load up the local column values */
831   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
832   cnt  = 0;
833   for (i=0; i<mat->rmap.n; i++) {
834     for (j=B->i[i]; j<B->i[i+1]; j++) {
835       if ( garray[B->j[j]] > cstart) break;
836       column_values[cnt++] = B->a[j];
837     }
838     for (k=A->i[i]; k<A->i[i+1]; k++) {
839       column_values[cnt++] = A->a[k];
840     }
841     for (; j<B->i[i+1]; j++) {
842       column_values[cnt++] = B->a[j];
843     }
844   }
845   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
846 
847   /* store the column values to the file */
848   if (!rank) {
849     MPI_Status status;
850     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
851     for (i=1; i<size; i++) {
852       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
853       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
854       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
855       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
856     }
857   } else {
858     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
859     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
860   }
861   ierr = PetscFree(column_values);CHKERRQ(ierr);
862   PetscFunctionReturn(0);
863 }
864 
865 #undef __FUNCT__
866 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
867 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
868 {
869   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
870   PetscErrorCode    ierr;
871   PetscMPIInt       rank = aij->rank,size = aij->size;
872   PetscTruth        isdraw,iascii,isbinary;
873   PetscViewer       sviewer;
874   PetscViewerFormat format;
875 
876   PetscFunctionBegin;
877   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
878   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
880   if (iascii) {
881     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
882     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
883       MatInfo    info;
884       PetscTruth inodes;
885 
886       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
887       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
888       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
889       if (!inodes) {
890         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
891 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
892       } else {
893         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
894 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
895       }
896       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
897       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
898       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
899       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
900       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
901       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
902       PetscFunctionReturn(0);
903     } else if (format == PETSC_VIEWER_ASCII_INFO) {
904       PetscInt   inodecount,inodelimit,*inodes;
905       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
906       if (inodes) {
907         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
908       } else {
909         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
910       }
911       PetscFunctionReturn(0);
912     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
913       PetscFunctionReturn(0);
914     }
915   } else if (isbinary) {
916     if (size == 1) {
917       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
918       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
919     } else {
920       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
921     }
922     PetscFunctionReturn(0);
923   } else if (isdraw) {
924     PetscDraw  draw;
925     PetscTruth isnull;
926     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
927     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
928   }
929 
930   if (size == 1) {
931     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
932     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
933   } else {
934     /* assemble the entire matrix onto first processor. */
935     Mat         A;
936     Mat_SeqAIJ  *Aloc;
937     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
938     PetscScalar *a;
939 
940     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
941     if (!rank) {
942       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
943     } else {
944       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
945     }
946     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
947     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
948     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
949     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
950 
951     /* copy over the A part */
952     Aloc = (Mat_SeqAIJ*)aij->A->data;
953     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
954     row = mat->rmap.rstart;
955     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
956     for (i=0; i<m; i++) {
957       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
958       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
959     }
960     aj = Aloc->j;
961     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
962 
963     /* copy over the B part */
964     Aloc = (Mat_SeqAIJ*)aij->B->data;
965     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
966     row  = mat->rmap.rstart;
967     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
968     ct   = cols;
969     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
970     for (i=0; i<m; i++) {
971       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
972       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
973     }
974     ierr = PetscFree(ct);CHKERRQ(ierr);
975     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977     /*
978        Everyone has to call to draw the matrix since the graphics waits are
979        synchronized across all processors that share the PetscDraw object
980     */
981     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
982     if (!rank) {
983       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
984       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
985     }
986     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
987     ierr = MatDestroy(A);CHKERRQ(ierr);
988   }
989   PetscFunctionReturn(0);
990 }
991 
992 #undef __FUNCT__
993 #define __FUNCT__ "MatView_MPIAIJ"
994 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
995 {
996   PetscErrorCode ierr;
997   PetscTruth     iascii,isdraw,issocket,isbinary;
998 
999   PetscFunctionBegin;
1000   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1001   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1002   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1003   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1004   if (iascii || isdraw || isbinary || issocket) {
1005     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1006   } else {
1007     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1008   }
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatRelax_MPIAIJ"
1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1017 {
1018   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1019   PetscErrorCode ierr;
1020   Vec            bb1;
1021 
1022   PetscFunctionBegin;
1023   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1024 
1025   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1026 
1027   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1028     if (flag & SOR_ZERO_INITIAL_GUESS) {
1029       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1030       its--;
1031     }
1032 
1033     while (its--) {
1034       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1035       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1036 
1037       /* update rhs: bb1 = bb - B*x */
1038       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1039       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1040 
1041       /* local sweep */
1042       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1043       CHKERRQ(ierr);
1044     }
1045   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1046     if (flag & SOR_ZERO_INITIAL_GUESS) {
1047       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1048       its--;
1049     }
1050     while (its--) {
1051       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1052       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1053 
1054       /* update rhs: bb1 = bb - B*x */
1055       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1056       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1057 
1058       /* local sweep */
1059       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1060       CHKERRQ(ierr);
1061     }
1062   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1063     if (flag & SOR_ZERO_INITIAL_GUESS) {
1064       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1065       its--;
1066     }
1067     while (its--) {
1068       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1069       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1070 
1071       /* update rhs: bb1 = bb - B*x */
1072       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1073       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1074 
1075       /* local sweep */
1076       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1077       CHKERRQ(ierr);
1078     }
1079   } else {
1080     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1081   }
1082 
1083   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatPermute_MPIAIJ"
1089 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1090 {
1091   MPI_Comm       comm,pcomm;
1092   PetscInt       first,local_size,nrows,*rows;
1093   int            ntids;
1094   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1095   PetscErrorCode ierr;
1096 
1097   PetscFunctionBegin;
1098   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1099   /* make a collective version of 'rowp' */
1100   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1101   if (pcomm==comm) {
1102     crowp = rowp;
1103   } else {
1104     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1105     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1106     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1107     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1108   }
1109   /* collect the global row permutation and invert it */
1110   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1111   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1112   if (pcomm!=comm) {
1113     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1114   }
1115   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1116   /* get the local target indices */
1117   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1118   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1119   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1120   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1121   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1122   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1123   /* the column permutation is so much easier;
1124      make a local version of 'colp' and invert it */
1125   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1126   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1127   if (ntids==1) {
1128     lcolp = colp;
1129   } else {
1130     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1131     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1132     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1133   }
1134   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1135   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1136   if (ntids>1) {
1137     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1138     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1139   }
1140   /* now we just get the submatrix */
1141   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1142   /* clean up */
1143   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1144   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1150 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1151 {
1152   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1153   Mat            A = mat->A,B = mat->B;
1154   PetscErrorCode ierr;
1155   PetscReal      isend[5],irecv[5];
1156 
1157   PetscFunctionBegin;
1158   info->block_size     = 1.0;
1159   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1160   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1161   isend[3] = info->memory;  isend[4] = info->mallocs;
1162   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1163   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1164   isend[3] += info->memory;  isend[4] += info->mallocs;
1165   if (flag == MAT_LOCAL) {
1166     info->nz_used      = isend[0];
1167     info->nz_allocated = isend[1];
1168     info->nz_unneeded  = isend[2];
1169     info->memory       = isend[3];
1170     info->mallocs      = isend[4];
1171   } else if (flag == MAT_GLOBAL_MAX) {
1172     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1173     info->nz_used      = irecv[0];
1174     info->nz_allocated = irecv[1];
1175     info->nz_unneeded  = irecv[2];
1176     info->memory       = irecv[3];
1177     info->mallocs      = irecv[4];
1178   } else if (flag == MAT_GLOBAL_SUM) {
1179     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1180     info->nz_used      = irecv[0];
1181     info->nz_allocated = irecv[1];
1182     info->nz_unneeded  = irecv[2];
1183     info->memory       = irecv[3];
1184     info->mallocs      = irecv[4];
1185   }
1186   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1187   info->fill_ratio_needed = 0;
1188   info->factor_mallocs    = 0;
1189   info->rows_global       = (double)matin->rmap.N;
1190   info->columns_global    = (double)matin->cmap.N;
1191   info->rows_local        = (double)matin->rmap.n;
1192   info->columns_local     = (double)matin->cmap.N;
1193 
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "MatSetOption_MPIAIJ"
1199 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1200 {
1201   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1202   PetscErrorCode ierr;
1203 
1204   PetscFunctionBegin;
1205   switch (op) {
1206   case MAT_NO_NEW_NONZERO_LOCATIONS:
1207   case MAT_YES_NEW_NONZERO_LOCATIONS:
1208   case MAT_COLUMNS_UNSORTED:
1209   case MAT_COLUMNS_SORTED:
1210   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1211   case MAT_KEEP_ZEROED_ROWS:
1212   case MAT_NEW_NONZERO_LOCATION_ERR:
1213   case MAT_USE_INODES:
1214   case MAT_DO_NOT_USE_INODES:
1215   case MAT_IGNORE_ZERO_ENTRIES:
1216     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1217     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1218     break;
1219   case MAT_ROW_ORIENTED:
1220     a->roworiented = PETSC_TRUE;
1221     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1222     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1223     break;
1224   case MAT_ROWS_SORTED:
1225   case MAT_ROWS_UNSORTED:
1226   case MAT_YES_NEW_DIAGONALS:
1227     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1228     break;
1229   case MAT_COLUMN_ORIENTED:
1230     a->roworiented = PETSC_FALSE;
1231     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1232     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1233     break;
1234   case MAT_IGNORE_OFF_PROC_ENTRIES:
1235     a->donotstash = PETSC_TRUE;
1236     break;
1237   case MAT_NO_NEW_DIAGONALS:
1238     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1239   case MAT_SYMMETRIC:
1240     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1241     break;
1242   case MAT_STRUCTURALLY_SYMMETRIC:
1243   case MAT_HERMITIAN:
1244   case MAT_SYMMETRY_ETERNAL:
1245     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1246     break;
1247   case MAT_NOT_SYMMETRIC:
1248   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1249   case MAT_NOT_HERMITIAN:
1250   case MAT_NOT_SYMMETRY_ETERNAL:
1251     break;
1252   default:
1253     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatGetRow_MPIAIJ"
1260 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1261 {
1262   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1263   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1264   PetscErrorCode ierr;
1265   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1266   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1267   PetscInt       *cmap,*idx_p;
1268 
1269   PetscFunctionBegin;
1270   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1271   mat->getrowactive = PETSC_TRUE;
1272 
1273   if (!mat->rowvalues && (idx || v)) {
1274     /*
1275         allocate enough space to hold information from the longest row.
1276     */
1277     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1278     PetscInt     max = 1,tmp;
1279     for (i=0; i<matin->rmap.n; i++) {
1280       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1281       if (max < tmp) { max = tmp; }
1282     }
1283     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1284     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1285   }
1286 
1287   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1288   lrow = row - rstart;
1289 
1290   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1291   if (!v)   {pvA = 0; pvB = 0;}
1292   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1293   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1294   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1295   nztot = nzA + nzB;
1296 
1297   cmap  = mat->garray;
1298   if (v  || idx) {
1299     if (nztot) {
1300       /* Sort by increasing column numbers, assuming A and B already sorted */
1301       PetscInt imark = -1;
1302       if (v) {
1303         *v = v_p = mat->rowvalues;
1304         for (i=0; i<nzB; i++) {
1305           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1306           else break;
1307         }
1308         imark = i;
1309         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1310         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1311       }
1312       if (idx) {
1313         *idx = idx_p = mat->rowindices;
1314         if (imark > -1) {
1315           for (i=0; i<imark; i++) {
1316             idx_p[i] = cmap[cworkB[i]];
1317           }
1318         } else {
1319           for (i=0; i<nzB; i++) {
1320             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1321             else break;
1322           }
1323           imark = i;
1324         }
1325         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1326         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1327       }
1328     } else {
1329       if (idx) *idx = 0;
1330       if (v)   *v   = 0;
1331     }
1332   }
1333   *nz = nztot;
1334   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1335   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #undef __FUNCT__
1340 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1341 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1342 {
1343   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1344 
1345   PetscFunctionBegin;
1346   if (!aij->getrowactive) {
1347     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1348   }
1349   aij->getrowactive = PETSC_FALSE;
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatNorm_MPIAIJ"
1355 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1356 {
1357   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1358   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1359   PetscErrorCode ierr;
1360   PetscInt       i,j,cstart = mat->cmap.rstart;
1361   PetscReal      sum = 0.0;
1362   PetscScalar    *v;
1363 
1364   PetscFunctionBegin;
1365   if (aij->size == 1) {
1366     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1367   } else {
1368     if (type == NORM_FROBENIUS) {
1369       v = amat->a;
1370       for (i=0; i<amat->nz; i++) {
1371 #if defined(PETSC_USE_COMPLEX)
1372         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1373 #else
1374         sum += (*v)*(*v); v++;
1375 #endif
1376       }
1377       v = bmat->a;
1378       for (i=0; i<bmat->nz; i++) {
1379 #if defined(PETSC_USE_COMPLEX)
1380         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1381 #else
1382         sum += (*v)*(*v); v++;
1383 #endif
1384       }
1385       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1386       *norm = sqrt(*norm);
1387     } else if (type == NORM_1) { /* max column norm */
1388       PetscReal *tmp,*tmp2;
1389       PetscInt    *jj,*garray = aij->garray;
1390       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1391       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1392       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1393       *norm = 0.0;
1394       v = amat->a; jj = amat->j;
1395       for (j=0; j<amat->nz; j++) {
1396         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1397       }
1398       v = bmat->a; jj = bmat->j;
1399       for (j=0; j<bmat->nz; j++) {
1400         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1401       }
1402       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1403       for (j=0; j<mat->cmap.N; j++) {
1404         if (tmp2[j] > *norm) *norm = tmp2[j];
1405       }
1406       ierr = PetscFree(tmp);CHKERRQ(ierr);
1407       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1408     } else if (type == NORM_INFINITY) { /* max row norm */
1409       PetscReal ntemp = 0.0;
1410       for (j=0; j<aij->A->rmap.n; j++) {
1411         v = amat->a + amat->i[j];
1412         sum = 0.0;
1413         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1414           sum += PetscAbsScalar(*v); v++;
1415         }
1416         v = bmat->a + bmat->i[j];
1417         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1418           sum += PetscAbsScalar(*v); v++;
1419         }
1420         if (sum > ntemp) ntemp = sum;
1421       }
1422       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1423     } else {
1424       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1425     }
1426   }
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 #undef __FUNCT__
1431 #define __FUNCT__ "MatTranspose_MPIAIJ"
1432 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1433 {
1434   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1435   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1436   PetscErrorCode ierr;
1437   PetscInt       M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
1438   Mat            B;
1439   PetscScalar    *array;
1440 
1441   PetscFunctionBegin;
1442   if (!matout && M != N) {
1443     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1444   }
1445 
1446   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1447   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1448   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1449   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1450 
1451   /* copy over the A part */
1452   Aloc = (Mat_SeqAIJ*)a->A->data;
1453   m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1454   row = A->rmap.rstart;
1455   for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;}
1456   for (i=0; i<m; i++) {
1457     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1458     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1459   }
1460   aj = Aloc->j;
1461   for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;}
1462 
1463   /* copy over the B part */
1464   Aloc = (Mat_SeqAIJ*)a->B->data;
1465   m = a->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1466   row  = A->rmap.rstart;
1467   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1468   ct   = cols;
1469   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1470   for (i=0; i<m; i++) {
1471     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1472     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1473   }
1474   ierr = PetscFree(ct);CHKERRQ(ierr);
1475   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1476   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1477   if (matout) {
1478     *matout = B;
1479   } else {
1480     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1481   }
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 #undef __FUNCT__
1486 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1487 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1488 {
1489   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1490   Mat            a = aij->A,b = aij->B;
1491   PetscErrorCode ierr;
1492   PetscInt       s1,s2,s3;
1493 
1494   PetscFunctionBegin;
1495   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1496   if (rr) {
1497     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1498     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1499     /* Overlap communication with computation. */
1500     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1501   }
1502   if (ll) {
1503     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1504     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1505     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1506   }
1507   /* scale  the diagonal block */
1508   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1509 
1510   if (rr) {
1511     /* Do a scatter end and then right scale the off-diagonal block */
1512     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1513     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1514   }
1515 
1516   PetscFunctionReturn(0);
1517 }
1518 
1519 #undef __FUNCT__
1520 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1521 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1522 {
1523   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1524   PetscErrorCode ierr;
1525 
1526   PetscFunctionBegin;
1527   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1528   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1529   PetscFunctionReturn(0);
1530 }
1531 #undef __FUNCT__
1532 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1533 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1534 {
1535   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1536   PetscErrorCode ierr;
1537 
1538   PetscFunctionBegin;
1539   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1540   PetscFunctionReturn(0);
1541 }
1542 
1543 #undef __FUNCT__
1544 #define __FUNCT__ "MatEqual_MPIAIJ"
1545 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1546 {
1547   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1548   Mat            a,b,c,d;
1549   PetscTruth     flg;
1550   PetscErrorCode ierr;
1551 
1552   PetscFunctionBegin;
1553   a = matA->A; b = matA->B;
1554   c = matB->A; d = matB->B;
1555 
1556   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1557   if (flg) {
1558     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1559   }
1560   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1561   PetscFunctionReturn(0);
1562 }
1563 
1564 #undef __FUNCT__
1565 #define __FUNCT__ "MatCopy_MPIAIJ"
1566 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1567 {
1568   PetscErrorCode ierr;
1569   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1570   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1571 
1572   PetscFunctionBegin;
1573   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1574   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1575     /* because of the column compression in the off-processor part of the matrix a->B,
1576        the number of columns in a->B and b->B may be different, hence we cannot call
1577        the MatCopy() directly on the two parts. If need be, we can provide a more
1578        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1579        then copying the submatrices */
1580     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1581   } else {
1582     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1583     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1584   }
1585   PetscFunctionReturn(0);
1586 }
1587 
1588 #undef __FUNCT__
1589 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1590 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1591 {
1592   PetscErrorCode ierr;
1593 
1594   PetscFunctionBegin;
1595   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1596   PetscFunctionReturn(0);
1597 }
1598 
1599 #include "petscblaslapack.h"
1600 #undef __FUNCT__
1601 #define __FUNCT__ "MatAXPY_MPIAIJ"
1602 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1603 {
1604   PetscErrorCode ierr;
1605   PetscInt       i;
1606   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1607   PetscBLASInt   bnz,one=1;
1608   Mat_SeqAIJ     *x,*y;
1609 
1610   PetscFunctionBegin;
1611   if (str == SAME_NONZERO_PATTERN) {
1612     PetscScalar alpha = a;
1613     x = (Mat_SeqAIJ *)xx->A->data;
1614     y = (Mat_SeqAIJ *)yy->A->data;
1615     bnz = (PetscBLASInt)x->nz;
1616     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1617     x = (Mat_SeqAIJ *)xx->B->data;
1618     y = (Mat_SeqAIJ *)yy->B->data;
1619     bnz = (PetscBLASInt)x->nz;
1620     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1621   } else if (str == SUBSET_NONZERO_PATTERN) {
1622     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1623 
1624     x = (Mat_SeqAIJ *)xx->B->data;
1625     y = (Mat_SeqAIJ *)yy->B->data;
1626     if (y->xtoy && y->XtoY != xx->B) {
1627       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1628       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1629     }
1630     if (!y->xtoy) { /* get xtoy */
1631       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1632       y->XtoY = xx->B;
1633     }
1634     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1635   } else {
1636     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1637   }
1638   PetscFunctionReturn(0);
1639 }
1640 
1641 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1642 
1643 #undef __FUNCT__
1644 #define __FUNCT__ "MatConjugate_MPIAIJ"
1645 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1646 {
1647 #if defined(PETSC_USE_COMPLEX)
1648   PetscErrorCode ierr;
1649   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1650 
1651   PetscFunctionBegin;
1652   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1653   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1654 #else
1655   PetscFunctionBegin;
1656 #endif
1657   PetscFunctionReturn(0);
1658 }
1659 
1660 #undef __FUNCT__
1661 #define __FUNCT__ "MatRealPart_MPIAIJ"
1662 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1663 {
1664   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1665   PetscErrorCode ierr;
1666 
1667   PetscFunctionBegin;
1668   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1669   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1670   PetscFunctionReturn(0);
1671 }
1672 
1673 #undef __FUNCT__
1674 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1675 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1676 {
1677   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1678   PetscErrorCode ierr;
1679 
1680   PetscFunctionBegin;
1681   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1682   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 #ifdef PETSC_HAVE_PBGL
1687 #include <boost/parallel/mpi/bsp_process_group.hpp>
1688 typedef boost::parallel::mpi::bsp_process_group            process_group_type;
1689 
1690 #include <boost/graph/distributed/adjacency_list.hpp>
1691 #include <boost/parallel/mpi/bsp_process_group.hpp>
1692 
1693 #include <boost/graph/distributed/petsc/interface.hpp>
1694 #include <boost/graph/distributed/ilu_0.hpp>
1695 
1696 namespace petsc = boost::distributed::petsc;
1697 using namespace std;
1698 typedef double                                             value_type;
1699 typedef boost::graph::distributed::ilu_elimination_state   elimination_state;
1700 typedef boost::adjacency_list<boost::listS,
1701                        boost::distributedS<process_group_type, boost::vecS>,
1702                        boost::bidirectionalS,
1703                        // Vertex properties
1704                        boost::no_property,
1705                        // Edge properties
1706                        boost::property<boost::edge_weight_t, value_type,
1707                          boost::property<boost::edge_finished_t, elimination_state> > > graph_type;
1708 
1709 typedef boost::graph_traits<graph_type>::vertex_descriptor        vertex_type;
1710 typedef boost::graph_traits<graph_type>::edge_descriptor          edge_type;
1711 typedef boost::property_map<graph_type, boost::edge_weight_t>::type      weight_map_type;
1712 
1713 #undef __FUNCT__
1714 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1715 /*
1716   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1717 */
1718 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1719 {
1720   PetscTruth           row_identity, col_identity;
1721   PetscObjectContainer c;
1722   PetscInt             m, n, M, N;
1723   PetscErrorCode       ierr;
1724 
1725   PetscFunctionBegin;
1726   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1727   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1728   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1729   if (!row_identity || !col_identity) {
1730     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1731   }
1732 
1733   process_group_type pg;
1734   graph_type*        graph_p = new graph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1735   graph_type&        graph   = *graph_p;
1736   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1737 
1738   //write_graphviz("petsc_matrix_as_graph.dot", graph, default_writer(), matrix_graph_writer<graph_type>(graph));
1739   boost::property_map<graph_type, boost::edge_finished_t>::type finished = get(boost::edge_finished, graph);
1740   BGL_FORALL_EDGES(e, graph, graph_type)
1741     put(finished, e, boost::graph::distributed::unseen);
1742 
1743   ilu_0(graph, get(boost::edge_weight, graph), get(boost::edge_finished, graph));
1744 
1745   /* put together the new matrix */
1746   ierr = MatCreate(A->comm, fact);CHKERRQ(ierr);
1747   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1748   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1749   ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr);
1750   ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr);
1751   ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1752   ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1753   (*fact)->factor = FACTOR_LU;
1754 
1755   ierr = PetscObjectContainerCreate(A->comm, &c);
1756   ierr = PetscObjectContainerSetPointer(c, graph_p);
1757   ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1758   PetscFunctionReturn(0);
1759 }
1760 
1761 #undef __FUNCT__
1762 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1763 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1764 {
1765   PetscFunctionBegin;
1766   PetscFunctionReturn(0);
1767 }
1768 
1769 #undef __FUNCT__
1770 #define __FUNCT__ "MatSolve_MPIAIJ"
1771 /*
1772   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1773 */
1774 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1775 {
1776   graph_type*          graph_p;
1777   PetscObjectContainer c;
1778   PetscErrorCode       ierr;
1779 
1780   PetscFunctionBegin;
1781   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1782   ierr = PetscObjectContainerGetPointer(c, (void **) &graph_p);CHKERRQ(ierr);
1783   ierr = VecCopy(b, x); CHKERRQ(ierr);
1784   PetscFunctionReturn(0);
1785 }
1786 #endif
1787 
1788 /* -------------------------------------------------------------------*/
1789 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1790        MatGetRow_MPIAIJ,
1791        MatRestoreRow_MPIAIJ,
1792        MatMult_MPIAIJ,
1793 /* 4*/ MatMultAdd_MPIAIJ,
1794        MatMultTranspose_MPIAIJ,
1795        MatMultTransposeAdd_MPIAIJ,
1796 #ifdef PETSC_HAVE_PBGL
1797        MatSolve_MPIAIJ,
1798 #else
1799        0,
1800 #endif
1801        0,
1802        0,
1803 /*10*/ 0,
1804        0,
1805        0,
1806        MatRelax_MPIAIJ,
1807        MatTranspose_MPIAIJ,
1808 /*15*/ MatGetInfo_MPIAIJ,
1809        MatEqual_MPIAIJ,
1810        MatGetDiagonal_MPIAIJ,
1811        MatDiagonalScale_MPIAIJ,
1812        MatNorm_MPIAIJ,
1813 /*20*/ MatAssemblyBegin_MPIAIJ,
1814        MatAssemblyEnd_MPIAIJ,
1815        0,
1816        MatSetOption_MPIAIJ,
1817        MatZeroEntries_MPIAIJ,
1818 /*25*/ MatZeroRows_MPIAIJ,
1819        0,
1820 #ifdef PETSC_HAVE_PBGL
1821        MatLUFactorNumeric_MPIAIJ,
1822 #else
1823        0,
1824 #endif
1825        0,
1826        0,
1827 /*30*/ MatSetUpPreallocation_MPIAIJ,
1828 #ifdef PETSC_HAVE_PBGL
1829        MatILUFactorSymbolic_MPIAIJ,
1830 #else
1831        0,
1832 #endif
1833        0,
1834        0,
1835        0,
1836 /*35*/ MatDuplicate_MPIAIJ,
1837        0,
1838        0,
1839        0,
1840        0,
1841 /*40*/ MatAXPY_MPIAIJ,
1842        MatGetSubMatrices_MPIAIJ,
1843        MatIncreaseOverlap_MPIAIJ,
1844        MatGetValues_MPIAIJ,
1845        MatCopy_MPIAIJ,
1846 /*45*/ 0,
1847        MatScale_MPIAIJ,
1848        0,
1849        0,
1850        0,
1851 /*50*/ MatSetBlockSize_MPIAIJ,
1852        0,
1853        0,
1854        0,
1855        0,
1856 /*55*/ MatFDColoringCreate_MPIAIJ,
1857        0,
1858        MatSetUnfactored_MPIAIJ,
1859        MatPermute_MPIAIJ,
1860        0,
1861 /*60*/ MatGetSubMatrix_MPIAIJ,
1862        MatDestroy_MPIAIJ,
1863        MatView_MPIAIJ,
1864        0,
1865        0,
1866 /*65*/ 0,
1867        0,
1868        0,
1869        0,
1870        0,
1871 /*70*/ 0,
1872        0,
1873        MatSetColoring_MPIAIJ,
1874 #if defined(PETSC_HAVE_ADIC)
1875        MatSetValuesAdic_MPIAIJ,
1876 #else
1877        0,
1878 #endif
1879        MatSetValuesAdifor_MPIAIJ,
1880 /*75*/ 0,
1881        0,
1882        0,
1883        0,
1884        0,
1885 /*80*/ 0,
1886        0,
1887        0,
1888        0,
1889 /*84*/ MatLoad_MPIAIJ,
1890        0,
1891        0,
1892        0,
1893        0,
1894        0,
1895 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1896        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1897        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1898        MatPtAP_Basic,
1899        MatPtAPSymbolic_MPIAIJ,
1900 /*95*/ MatPtAPNumeric_MPIAIJ,
1901        0,
1902        0,
1903        0,
1904        0,
1905 /*100*/0,
1906        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1907        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1908        MatConjugate_MPIAIJ,
1909        0,
1910 /*105*/MatSetValuesRow_MPIAIJ,
1911        MatRealPart_MPIAIJ,
1912        MatImaginaryPart_MPIAIJ};
1913 
1914 /* ----------------------------------------------------------------------------------------*/
1915 
1916 EXTERN_C_BEGIN
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1919 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
1920 {
1921   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1922   PetscErrorCode ierr;
1923 
1924   PetscFunctionBegin;
1925   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1926   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1927   PetscFunctionReturn(0);
1928 }
1929 EXTERN_C_END
1930 
1931 EXTERN_C_BEGIN
1932 #undef __FUNCT__
1933 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1934 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
1935 {
1936   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1937   PetscErrorCode ierr;
1938 
1939   PetscFunctionBegin;
1940   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1941   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1942   PetscFunctionReturn(0);
1943 }
1944 EXTERN_C_END
1945 
1946 #include "petscpc.h"
1947 EXTERN_C_BEGIN
1948 #undef __FUNCT__
1949 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1950 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1951 {
1952   Mat_MPIAIJ     *b;
1953   PetscErrorCode ierr;
1954   PetscInt       i;
1955 
1956   PetscFunctionBegin;
1957   B->preallocated = PETSC_TRUE;
1958   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1959   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1960   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1961   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1962 
1963   B->rmap.bs = B->cmap.bs = 1;
1964   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
1965   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
1966   if (d_nnz) {
1967     for (i=0; i<B->rmap.n; i++) {
1968       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1969     }
1970   }
1971   if (o_nnz) {
1972     for (i=0; i<B->rmap.n; i++) {
1973       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1974     }
1975   }
1976   b = (Mat_MPIAIJ*)B->data;
1977 
1978   /* Explicitly create 2 MATSEQAIJ matrices. */
1979   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1980   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
1981   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
1982   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1983   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1984   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
1985   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
1986   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1987 
1988   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1989   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1990 
1991   PetscFunctionReturn(0);
1992 }
1993 EXTERN_C_END
1994 
1995 #undef __FUNCT__
1996 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1997 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1998 {
1999   Mat            mat;
2000   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2001   PetscErrorCode ierr;
2002 
2003   PetscFunctionBegin;
2004   *newmat       = 0;
2005   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2006   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2007   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2008   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2009   a    = (Mat_MPIAIJ*)mat->data;
2010 
2011   mat->factor       = matin->factor;
2012   mat->rmap.bs      = matin->rmap.bs;
2013   mat->assembled    = PETSC_TRUE;
2014   mat->insertmode   = NOT_SET_VALUES;
2015   mat->preallocated = PETSC_TRUE;
2016 
2017   a->size           = oldmat->size;
2018   a->rank           = oldmat->rank;
2019   a->donotstash     = oldmat->donotstash;
2020   a->roworiented    = oldmat->roworiented;
2021   a->rowindices     = 0;
2022   a->rowvalues      = 0;
2023   a->getrowactive   = PETSC_FALSE;
2024 
2025   ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2026   ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2027 
2028   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2029   if (oldmat->colmap) {
2030 #if defined (PETSC_USE_CTABLE)
2031     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2032 #else
2033     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2034     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2035     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2036 #endif
2037   } else a->colmap = 0;
2038   if (oldmat->garray) {
2039     PetscInt len;
2040     len  = oldmat->B->cmap.n;
2041     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2042     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2043     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2044   } else a->garray = 0;
2045 
2046   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2047   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2048   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2049   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2050   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2051   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2052   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2053   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2054   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2055   *newmat = mat;
2056   PetscFunctionReturn(0);
2057 }
2058 
2059 #include "petscsys.h"
2060 
2061 #undef __FUNCT__
2062 #define __FUNCT__ "MatLoad_MPIAIJ"
2063 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2064 {
2065   Mat            A;
2066   PetscScalar    *vals,*svals;
2067   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2068   MPI_Status     status;
2069   PetscErrorCode ierr;
2070   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2071   PetscInt       i,nz,j,rstart,rend,mmax;
2072   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2073   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2074   PetscInt       cend,cstart,n,*rowners;
2075   int            fd;
2076 
2077   PetscFunctionBegin;
2078   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2079   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2080   if (!rank) {
2081     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2082     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2083     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2084   }
2085 
2086   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2087   M = header[1]; N = header[2];
2088   /* determine ownership of all rows */
2089   m    = M/size + ((M % size) > rank);
2090   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2091   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2092 
2093   /* First process needs enough room for process with most rows */
2094   if (!rank) {
2095     mmax       = rowners[1];
2096     for (i=2; i<size; i++) {
2097       mmax = PetscMax(mmax,rowners[i]);
2098     }
2099   } else mmax = m;
2100 
2101   rowners[0] = 0;
2102   for (i=2; i<=size; i++) {
2103     mmax       = PetscMax(mmax,rowners[i]);
2104     rowners[i] += rowners[i-1];
2105   }
2106   rstart = rowners[rank];
2107   rend   = rowners[rank+1];
2108 
2109   /* distribute row lengths to all processors */
2110   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2111   if (!rank) {
2112     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2113     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2114     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2115     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2116     for (j=0; j<m; j++) {
2117       procsnz[0] += ourlens[j];
2118     }
2119     for (i=1; i<size; i++) {
2120       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2121       /* calculate the number of nonzeros on each processor */
2122       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2123         procsnz[i] += rowlengths[j];
2124       }
2125       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2126     }
2127     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2128   } else {
2129     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2130   }
2131 
2132   if (!rank) {
2133     /* determine max buffer needed and allocate it */
2134     maxnz = 0;
2135     for (i=0; i<size; i++) {
2136       maxnz = PetscMax(maxnz,procsnz[i]);
2137     }
2138     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2139 
2140     /* read in my part of the matrix column indices  */
2141     nz   = procsnz[0];
2142     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2143     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2144 
2145     /* read in every one elses and ship off */
2146     for (i=1; i<size; i++) {
2147       nz   = procsnz[i];
2148       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2149       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2150     }
2151     ierr = PetscFree(cols);CHKERRQ(ierr);
2152   } else {
2153     /* determine buffer space needed for message */
2154     nz = 0;
2155     for (i=0; i<m; i++) {
2156       nz += ourlens[i];
2157     }
2158     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2159 
2160     /* receive message of column indices*/
2161     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2162     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2163     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2164   }
2165 
2166   /* determine column ownership if matrix is not square */
2167   if (N != M) {
2168     n      = N/size + ((N % size) > rank);
2169     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2170     cstart = cend - n;
2171   } else {
2172     cstart = rstart;
2173     cend   = rend;
2174     n      = cend - cstart;
2175   }
2176 
2177   /* loop over local rows, determining number of off diagonal entries */
2178   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2179   jj = 0;
2180   for (i=0; i<m; i++) {
2181     for (j=0; j<ourlens[i]; j++) {
2182       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2183       jj++;
2184     }
2185   }
2186 
2187   /* create our matrix */
2188   for (i=0; i<m; i++) {
2189     ourlens[i] -= offlens[i];
2190   }
2191   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2192   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2193   ierr = MatSetType(A,type);CHKERRQ(ierr);
2194   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2195 
2196   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2197   for (i=0; i<m; i++) {
2198     ourlens[i] += offlens[i];
2199   }
2200 
2201   if (!rank) {
2202     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2203 
2204     /* read in my part of the matrix numerical values  */
2205     nz   = procsnz[0];
2206     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2207 
2208     /* insert into matrix */
2209     jj      = rstart;
2210     smycols = mycols;
2211     svals   = vals;
2212     for (i=0; i<m; i++) {
2213       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2214       smycols += ourlens[i];
2215       svals   += ourlens[i];
2216       jj++;
2217     }
2218 
2219     /* read in other processors and ship out */
2220     for (i=1; i<size; i++) {
2221       nz   = procsnz[i];
2222       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2223       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2224     }
2225     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2226   } else {
2227     /* receive numeric values */
2228     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2229 
2230     /* receive message of values*/
2231     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2232     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2233     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2234 
2235     /* insert into matrix */
2236     jj      = rstart;
2237     smycols = mycols;
2238     svals   = vals;
2239     for (i=0; i<m; i++) {
2240       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2241       smycols += ourlens[i];
2242       svals   += ourlens[i];
2243       jj++;
2244     }
2245   }
2246   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2247   ierr = PetscFree(vals);CHKERRQ(ierr);
2248   ierr = PetscFree(mycols);CHKERRQ(ierr);
2249   ierr = PetscFree(rowners);CHKERRQ(ierr);
2250 
2251   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2252   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2253   *newmat = A;
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2259 /*
2260     Not great since it makes two copies of the submatrix, first an SeqAIJ
2261   in local and then by concatenating the local matrices the end result.
2262   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2263 */
2264 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2265 {
2266   PetscErrorCode ierr;
2267   PetscMPIInt    rank,size;
2268   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2269   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2270   Mat            *local,M,Mreuse;
2271   PetscScalar    *vwork,*aa;
2272   MPI_Comm       comm = mat->comm;
2273   Mat_SeqAIJ     *aij;
2274 
2275 
2276   PetscFunctionBegin;
2277   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2278   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2279 
2280   if (call ==  MAT_REUSE_MATRIX) {
2281     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2282     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2283     local = &Mreuse;
2284     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2285   } else {
2286     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2287     Mreuse = *local;
2288     ierr   = PetscFree(local);CHKERRQ(ierr);
2289   }
2290 
2291   /*
2292       m - number of local rows
2293       n - number of columns (same on all processors)
2294       rstart - first row in new global matrix generated
2295   */
2296   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2297   if (call == MAT_INITIAL_MATRIX) {
2298     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2299     ii  = aij->i;
2300     jj  = aij->j;
2301 
2302     /*
2303         Determine the number of non-zeros in the diagonal and off-diagonal
2304         portions of the matrix in order to do correct preallocation
2305     */
2306 
2307     /* first get start and end of "diagonal" columns */
2308     if (csize == PETSC_DECIDE) {
2309       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2310       if (mglobal == n) { /* square matrix */
2311 	nlocal = m;
2312       } else {
2313         nlocal = n/size + ((n % size) > rank);
2314       }
2315     } else {
2316       nlocal = csize;
2317     }
2318     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2319     rstart = rend - nlocal;
2320     if (rank == size - 1 && rend != n) {
2321       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2322     }
2323 
2324     /* next, compute all the lengths */
2325     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2326     olens = dlens + m;
2327     for (i=0; i<m; i++) {
2328       jend = ii[i+1] - ii[i];
2329       olen = 0;
2330       dlen = 0;
2331       for (j=0; j<jend; j++) {
2332         if (*jj < rstart || *jj >= rend) olen++;
2333         else dlen++;
2334         jj++;
2335       }
2336       olens[i] = olen;
2337       dlens[i] = dlen;
2338     }
2339     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2340     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2341     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2342     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2343     ierr = PetscFree(dlens);CHKERRQ(ierr);
2344   } else {
2345     PetscInt ml,nl;
2346 
2347     M = *newmat;
2348     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2349     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2350     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2351     /*
2352          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2353        rather than the slower MatSetValues().
2354     */
2355     M->was_assembled = PETSC_TRUE;
2356     M->assembled     = PETSC_FALSE;
2357   }
2358   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2359   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2360   ii  = aij->i;
2361   jj  = aij->j;
2362   aa  = aij->a;
2363   for (i=0; i<m; i++) {
2364     row   = rstart + i;
2365     nz    = ii[i+1] - ii[i];
2366     cwork = jj;     jj += nz;
2367     vwork = aa;     aa += nz;
2368     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2369   }
2370 
2371   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2372   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2373   *newmat = M;
2374 
2375   /* save submatrix used in processor for next request */
2376   if (call ==  MAT_INITIAL_MATRIX) {
2377     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2378     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2379   }
2380 
2381   PetscFunctionReturn(0);
2382 }
2383 
2384 EXTERN_C_BEGIN
2385 #undef __FUNCT__
2386 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2387 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2388 {
2389   PetscInt       m,cstart, cend,j,nnz,i,d;
2390   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2391   const PetscInt *JJ;
2392   PetscScalar    *values;
2393   PetscErrorCode ierr;
2394 
2395   PetscFunctionBegin;
2396   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2397 
2398   B->rmap.bs = B->cmap.bs = 1;
2399   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2400   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2401   m      = B->rmap.n;
2402   cstart = B->cmap.rstart;
2403   cend   = B->cmap.rend;
2404   rstart = B->rmap.rstart;
2405 
2406   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2407   o_nnz = d_nnz + m;
2408 
2409   for (i=0; i<m; i++) {
2410     nnz     = Ii[i+1]- Ii[i];
2411     JJ      = J + Ii[i];
2412     nnz_max = PetscMax(nnz_max,nnz);
2413     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2414     for (j=0; j<nnz; j++) {
2415       if (*JJ >= cstart) break;
2416       JJ++;
2417     }
2418     d = 0;
2419     for (; j<nnz; j++) {
2420       if (*JJ++ >= cend) break;
2421       d++;
2422     }
2423     d_nnz[i] = d;
2424     o_nnz[i] = nnz - d;
2425   }
2426   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2427   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2428 
2429   if (v) values = (PetscScalar*)v;
2430   else {
2431     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2432     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2433   }
2434 
2435   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2436   for (i=0; i<m; i++) {
2437     ii   = i + rstart;
2438     nnz  = Ii[i+1]- Ii[i];
2439     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2440   }
2441   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2442   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2443   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2444 
2445   if (!v) {
2446     ierr = PetscFree(values);CHKERRQ(ierr);
2447   }
2448   PetscFunctionReturn(0);
2449 }
2450 EXTERN_C_END
2451 
2452 #undef __FUNCT__
2453 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2454 /*@
2455    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2456    (the default parallel PETSc format).
2457 
2458    Collective on MPI_Comm
2459 
2460    Input Parameters:
2461 +  B - the matrix
2462 .  i - the indices into j for the start of each local row (starts with zero)
2463 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2464 -  v - optional values in the matrix
2465 
2466    Level: developer
2467 
2468    Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal
2469      storage format. Thus changing the values in a[] after this call will not effect the matrix values.
2470 
2471 .keywords: matrix, aij, compressed row, sparse, parallel
2472 
2473 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
2474           MatCreateSeqAIJWithArrays()
2475 @*/
2476 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2477 {
2478   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2479 
2480   PetscFunctionBegin;
2481   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2482   if (f) {
2483     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2484   }
2485   PetscFunctionReturn(0);
2486 }
2487 
2488 #undef __FUNCT__
2489 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2490 /*@C
2491    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2492    (the default parallel PETSc format).  For good matrix assembly performance
2493    the user should preallocate the matrix storage by setting the parameters
2494    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2495    performance can be increased by more than a factor of 50.
2496 
2497    Collective on MPI_Comm
2498 
2499    Input Parameters:
2500 +  A - the matrix
2501 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2502            (same value is used for all local rows)
2503 .  d_nnz - array containing the number of nonzeros in the various rows of the
2504            DIAGONAL portion of the local submatrix (possibly different for each row)
2505            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2506            The size of this array is equal to the number of local rows, i.e 'm'.
2507            You must leave room for the diagonal entry even if it is zero.
2508 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2509            submatrix (same value is used for all local rows).
2510 -  o_nnz - array containing the number of nonzeros in the various rows of the
2511            OFF-DIAGONAL portion of the local submatrix (possibly different for
2512            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2513            structure. The size of this array is equal to the number
2514            of local rows, i.e 'm'.
2515 
2516    If the *_nnz parameter is given then the *_nz parameter is ignored
2517 
2518    The AIJ format (also called the Yale sparse matrix format or
2519    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2520    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2521 
2522    The parallel matrix is partitioned such that the first m0 rows belong to
2523    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2524    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2525 
2526    The DIAGONAL portion of the local submatrix of a processor can be defined
2527    as the submatrix which is obtained by extraction the part corresponding
2528    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2529    first row that belongs to the processor, and r2 is the last row belonging
2530    to the this processor. This is a square mxm matrix. The remaining portion
2531    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2532 
2533    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2534 
2535    Example usage:
2536 
2537    Consider the following 8x8 matrix with 34 non-zero values, that is
2538    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2539    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2540    as follows:
2541 
2542 .vb
2543             1  2  0  |  0  3  0  |  0  4
2544     Proc0   0  5  6  |  7  0  0  |  8  0
2545             9  0 10  | 11  0  0  | 12  0
2546     -------------------------------------
2547            13  0 14  | 15 16 17  |  0  0
2548     Proc1   0 18  0  | 19 20 21  |  0  0
2549             0  0  0  | 22 23  0  | 24  0
2550     -------------------------------------
2551     Proc2  25 26 27  |  0  0 28  | 29  0
2552            30  0  0  | 31 32 33  |  0 34
2553 .ve
2554 
2555    This can be represented as a collection of submatrices as:
2556 
2557 .vb
2558       A B C
2559       D E F
2560       G H I
2561 .ve
2562 
2563    Where the submatrices A,B,C are owned by proc0, D,E,F are
2564    owned by proc1, G,H,I are owned by proc2.
2565 
2566    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2567    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2568    The 'M','N' parameters are 8,8, and have the same values on all procs.
2569 
2570    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2571    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2572    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2573    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2574    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2575    matrix, ans [DF] as another SeqAIJ matrix.
2576 
2577    When d_nz, o_nz parameters are specified, d_nz storage elements are
2578    allocated for every row of the local diagonal submatrix, and o_nz
2579    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2580    One way to choose d_nz and o_nz is to use the max nonzerors per local
2581    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2582    In this case, the values of d_nz,o_nz are:
2583 .vb
2584      proc0 : dnz = 2, o_nz = 2
2585      proc1 : dnz = 3, o_nz = 2
2586      proc2 : dnz = 1, o_nz = 4
2587 .ve
2588    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2589    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2590    for proc3. i.e we are using 12+15+10=37 storage locations to store
2591    34 values.
2592 
2593    When d_nnz, o_nnz parameters are specified, the storage is specified
2594    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2595    In the above case the values for d_nnz,o_nnz are:
2596 .vb
2597      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2598      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2599      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2600 .ve
2601    Here the space allocated is sum of all the above values i.e 34, and
2602    hence pre-allocation is perfect.
2603 
2604    Level: intermediate
2605 
2606 .keywords: matrix, aij, compressed row, sparse, parallel
2607 
2608 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2609           MPIAIJ
2610 @*/
2611 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2612 {
2613   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2614 
2615   PetscFunctionBegin;
2616   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2617   if (f) {
2618     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2619   }
2620   PetscFunctionReturn(0);
2621 }
2622 
2623 #undef __FUNCT__
2624 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
2625 /*@C
2626      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
2627          CSR format the local rows.
2628 
2629    Collective on MPI_Comm
2630 
2631    Input Parameters:
2632 +  comm - MPI communicator
2633 .  m - number of local rows (Cannot be PETSC_DECIDE)
2634 .  n - This value should be the same as the local size used in creating the
2635        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2636        calculated if N is given) For square matrices n is almost always m.
2637 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2638 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2639 .   i - row indices
2640 .   j - column indices
2641 -   a - matrix values
2642 
2643    Output Parameter:
2644 .   mat - the matrix
2645    Level: intermediate
2646 
2647    Notes:
2648        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2649      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2650      called this routine.
2651 
2652        The i and j indices are 0 based
2653 
2654 .keywords: matrix, aij, compressed row, sparse, parallel
2655 
2656 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2657           MPIAIJ, MatCreateMPIAIJ()
2658 @*/
2659 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2660 {
2661   PetscErrorCode ierr;
2662 
2663  PetscFunctionBegin;
2664   if (i[0]) {
2665     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2666   }
2667   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2668   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2669   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
2670   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
2671   PetscFunctionReturn(0);
2672 }
2673 
2674 #undef __FUNCT__
2675 #define __FUNCT__ "MatCreateMPIAIJ"
2676 /*@C
2677    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2678    (the default parallel PETSc format).  For good matrix assembly performance
2679    the user should preallocate the matrix storage by setting the parameters
2680    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2681    performance can be increased by more than a factor of 50.
2682 
2683    Collective on MPI_Comm
2684 
2685    Input Parameters:
2686 +  comm - MPI communicator
2687 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2688            This value should be the same as the local size used in creating the
2689            y vector for the matrix-vector product y = Ax.
2690 .  n - This value should be the same as the local size used in creating the
2691        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2692        calculated if N is given) For square matrices n is almost always m.
2693 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2694 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2695 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2696            (same value is used for all local rows)
2697 .  d_nnz - array containing the number of nonzeros in the various rows of the
2698            DIAGONAL portion of the local submatrix (possibly different for each row)
2699            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2700            The size of this array is equal to the number of local rows, i.e 'm'.
2701            You must leave room for the diagonal entry even if it is zero.
2702 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2703            submatrix (same value is used for all local rows).
2704 -  o_nnz - array containing the number of nonzeros in the various rows of the
2705            OFF-DIAGONAL portion of the local submatrix (possibly different for
2706            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2707            structure. The size of this array is equal to the number
2708            of local rows, i.e 'm'.
2709 
2710    Output Parameter:
2711 .  A - the matrix
2712 
2713    Notes:
2714    If the *_nnz parameter is given then the *_nz parameter is ignored
2715 
2716    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2717    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2718    storage requirements for this matrix.
2719 
2720    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2721    processor than it must be used on all processors that share the object for
2722    that argument.
2723 
2724    The user MUST specify either the local or global matrix dimensions
2725    (possibly both).
2726 
2727    The parallel matrix is partitioned such that the first m0 rows belong to
2728    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2729    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2730 
2731    The DIAGONAL portion of the local submatrix of a processor can be defined
2732    as the submatrix which is obtained by extraction the part corresponding
2733    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2734    first row that belongs to the processor, and r2 is the last row belonging
2735    to the this processor. This is a square mxm matrix. The remaining portion
2736    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2737 
2738    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2739 
2740    When calling this routine with a single process communicator, a matrix of
2741    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2742    type of communicator, use the construction mechanism:
2743      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2744 
2745    By default, this format uses inodes (identical nodes) when possible.
2746    We search for consecutive rows with the same nonzero structure, thereby
2747    reusing matrix information to achieve increased efficiency.
2748 
2749    Options Database Keys:
2750 +  -mat_no_inode  - Do not use inodes
2751 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
2752 -  -mat_aij_oneindex - Internally use indexing starting at 1
2753         rather than 0.  Note that when calling MatSetValues(),
2754         the user still MUST index entries starting at 0!
2755 
2756 
2757    Example usage:
2758 
2759    Consider the following 8x8 matrix with 34 non-zero values, that is
2760    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2761    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2762    as follows:
2763 
2764 .vb
2765             1  2  0  |  0  3  0  |  0  4
2766     Proc0   0  5  6  |  7  0  0  |  8  0
2767             9  0 10  | 11  0  0  | 12  0
2768     -------------------------------------
2769            13  0 14  | 15 16 17  |  0  0
2770     Proc1   0 18  0  | 19 20 21  |  0  0
2771             0  0  0  | 22 23  0  | 24  0
2772     -------------------------------------
2773     Proc2  25 26 27  |  0  0 28  | 29  0
2774            30  0  0  | 31 32 33  |  0 34
2775 .ve
2776 
2777    This can be represented as a collection of submatrices as:
2778 
2779 .vb
2780       A B C
2781       D E F
2782       G H I
2783 .ve
2784 
2785    Where the submatrices A,B,C are owned by proc0, D,E,F are
2786    owned by proc1, G,H,I are owned by proc2.
2787 
2788    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2789    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2790    The 'M','N' parameters are 8,8, and have the same values on all procs.
2791 
2792    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2793    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2794    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2795    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2796    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2797    matrix, ans [DF] as another SeqAIJ matrix.
2798 
2799    When d_nz, o_nz parameters are specified, d_nz storage elements are
2800    allocated for every row of the local diagonal submatrix, and o_nz
2801    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2802    One way to choose d_nz and o_nz is to use the max nonzerors per local
2803    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2804    In this case, the values of d_nz,o_nz are:
2805 .vb
2806      proc0 : dnz = 2, o_nz = 2
2807      proc1 : dnz = 3, o_nz = 2
2808      proc2 : dnz = 1, o_nz = 4
2809 .ve
2810    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2811    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2812    for proc3. i.e we are using 12+15+10=37 storage locations to store
2813    34 values.
2814 
2815    When d_nnz, o_nnz parameters are specified, the storage is specified
2816    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2817    In the above case the values for d_nnz,o_nnz are:
2818 .vb
2819      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2820      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2821      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2822 .ve
2823    Here the space allocated is sum of all the above values i.e 34, and
2824    hence pre-allocation is perfect.
2825 
2826    Level: intermediate
2827 
2828 .keywords: matrix, aij, compressed row, sparse, parallel
2829 
2830 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2831           MPIAIJ, MatCreateMPIAIJWithArrays()
2832 @*/
2833 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2834 {
2835   PetscErrorCode ierr;
2836   PetscMPIInt    size;
2837 
2838   PetscFunctionBegin;
2839   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2840   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2841   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2842   if (size > 1) {
2843     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2844     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2845   } else {
2846     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2847     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2848   }
2849   PetscFunctionReturn(0);
2850 }
2851 
2852 #undef __FUNCT__
2853 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2854 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2855 {
2856   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2857 
2858   PetscFunctionBegin;
2859   *Ad     = a->A;
2860   *Ao     = a->B;
2861   *colmap = a->garray;
2862   PetscFunctionReturn(0);
2863 }
2864 
2865 #undef __FUNCT__
2866 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2867 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2868 {
2869   PetscErrorCode ierr;
2870   PetscInt       i;
2871   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2872 
2873   PetscFunctionBegin;
2874   if (coloring->ctype == IS_COLORING_LOCAL) {
2875     ISColoringValue *allcolors,*colors;
2876     ISColoring      ocoloring;
2877 
2878     /* set coloring for diagonal portion */
2879     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2880 
2881     /* set coloring for off-diagonal portion */
2882     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2883     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2884     for (i=0; i<a->B->cmap.n; i++) {
2885       colors[i] = allcolors[a->garray[i]];
2886     }
2887     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2888     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2889     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2890     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2891   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2892     ISColoringValue *colors;
2893     PetscInt        *larray;
2894     ISColoring      ocoloring;
2895 
2896     /* set coloring for diagonal portion */
2897     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2898     for (i=0; i<a->A->cmap.n; i++) {
2899       larray[i] = i + A->cmap.rstart;
2900     }
2901     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2902     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2903     for (i=0; i<a->A->cmap.n; i++) {
2904       colors[i] = coloring->colors[larray[i]];
2905     }
2906     ierr = PetscFree(larray);CHKERRQ(ierr);
2907     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2908     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2909     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2910 
2911     /* set coloring for off-diagonal portion */
2912     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2913     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2914     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2915     for (i=0; i<a->B->cmap.n; i++) {
2916       colors[i] = coloring->colors[larray[i]];
2917     }
2918     ierr = PetscFree(larray);CHKERRQ(ierr);
2919     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2920     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2921     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2922   } else {
2923     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2924   }
2925 
2926   PetscFunctionReturn(0);
2927 }
2928 
2929 #if defined(PETSC_HAVE_ADIC)
2930 #undef __FUNCT__
2931 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2932 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2933 {
2934   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2935   PetscErrorCode ierr;
2936 
2937   PetscFunctionBegin;
2938   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2939   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2940   PetscFunctionReturn(0);
2941 }
2942 #endif
2943 
2944 #undef __FUNCT__
2945 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2946 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2947 {
2948   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2949   PetscErrorCode ierr;
2950 
2951   PetscFunctionBegin;
2952   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2953   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2954   PetscFunctionReturn(0);
2955 }
2956 
2957 #undef __FUNCT__
2958 #define __FUNCT__ "MatMerge"
2959 /*@C
2960       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2961                  matrices from each processor
2962 
2963     Collective on MPI_Comm
2964 
2965    Input Parameters:
2966 +    comm - the communicators the parallel matrix will live on
2967 .    inmat - the input sequential matrices
2968 .    n - number of local columns (or PETSC_DECIDE)
2969 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2970 
2971    Output Parameter:
2972 .    outmat - the parallel matrix generated
2973 
2974     Level: advanced
2975 
2976    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2977 
2978 @*/
2979 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2980 {
2981   PetscErrorCode ierr;
2982   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
2983   PetscInt       *indx;
2984   PetscScalar    *values;
2985 
2986   PetscFunctionBegin;
2987   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2988   if (scall == MAT_INITIAL_MATRIX){
2989     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2990     if (n == PETSC_DECIDE){
2991       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
2992     }
2993     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2994     rstart -= m;
2995 
2996     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2997     for (i=0;i<m;i++) {
2998       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2999       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3000       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3001     }
3002     /* This routine will ONLY return MPIAIJ type matrix */
3003     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3004     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3005     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3006     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3007     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3008 
3009   } else if (scall == MAT_REUSE_MATRIX){
3010     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3011   } else {
3012     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3013   }
3014 
3015   for (i=0;i<m;i++) {
3016     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3017     Ii    = i + rstart;
3018     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3019     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3020   }
3021   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3022   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3023   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3024 
3025   PetscFunctionReturn(0);
3026 }
3027 
3028 #undef __FUNCT__
3029 #define __FUNCT__ "MatFileSplit"
3030 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3031 {
3032   PetscErrorCode    ierr;
3033   PetscMPIInt       rank;
3034   PetscInt          m,N,i,rstart,nnz;
3035   size_t            len;
3036   const PetscInt    *indx;
3037   PetscViewer       out;
3038   char              *name;
3039   Mat               B;
3040   const PetscScalar *values;
3041 
3042   PetscFunctionBegin;
3043   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3044   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3045   /* Should this be the type of the diagonal block of A? */
3046   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3047   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3048   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3049   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3050   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3051   for (i=0;i<m;i++) {
3052     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3053     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3054     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3055   }
3056   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3057   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3058 
3059   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
3060   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3061   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3062   sprintf(name,"%s.%d",outfile,rank);
3063   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3064   ierr = PetscFree(name);
3065   ierr = MatView(B,out);CHKERRQ(ierr);
3066   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3067   ierr = MatDestroy(B);CHKERRQ(ierr);
3068   PetscFunctionReturn(0);
3069 }
3070 
3071 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3072 #undef __FUNCT__
3073 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3074 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3075 {
3076   PetscErrorCode       ierr;
3077   Mat_Merge_SeqsToMPI  *merge;
3078   PetscObjectContainer container;
3079 
3080   PetscFunctionBegin;
3081   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3082   if (container) {
3083     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3084     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3085     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3086     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3087     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3088     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3089     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3090     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3091     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3092     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3093     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3094     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3095 
3096     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
3097     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3098   }
3099   ierr = PetscFree(merge);CHKERRQ(ierr);
3100 
3101   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3102   PetscFunctionReturn(0);
3103 }
3104 
3105 #include "src/mat/utils/freespace.h"
3106 #include "petscbt.h"
3107 static PetscEvent logkey_seqstompinum = 0;
3108 #undef __FUNCT__
3109 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3110 /*@C
3111       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3112                  matrices from each processor
3113 
3114     Collective on MPI_Comm
3115 
3116    Input Parameters:
3117 +    comm - the communicators the parallel matrix will live on
3118 .    seqmat - the input sequential matrices
3119 .    m - number of local rows (or PETSC_DECIDE)
3120 .    n - number of local columns (or PETSC_DECIDE)
3121 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3122 
3123    Output Parameter:
3124 .    mpimat - the parallel matrix generated
3125 
3126     Level: advanced
3127 
3128    Notes:
3129      The dimensions of the sequential matrix in each processor MUST be the same.
3130      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3131      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3132 @*/
3133 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3134 {
3135   PetscErrorCode       ierr;
3136   MPI_Comm             comm=mpimat->comm;
3137   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3138   PetscMPIInt          size,rank,taga,*len_s;
3139   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3140   PetscInt             proc,m;
3141   PetscInt             **buf_ri,**buf_rj;
3142   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3143   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3144   MPI_Request          *s_waits,*r_waits;
3145   MPI_Status           *status;
3146   MatScalar            *aa=a->a,**abuf_r,*ba_i;
3147   Mat_Merge_SeqsToMPI  *merge;
3148   PetscObjectContainer container;
3149 
3150   PetscFunctionBegin;
3151   if (!logkey_seqstompinum) {
3152     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
3153   }
3154   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3155 
3156   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3157   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3158 
3159   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3160   if (container) {
3161     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3162   }
3163   bi     = merge->bi;
3164   bj     = merge->bj;
3165   buf_ri = merge->buf_ri;
3166   buf_rj = merge->buf_rj;
3167 
3168   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3169   owners = merge->rowmap.range;
3170   len_s  = merge->len_s;
3171 
3172   /* send and recv matrix values */
3173   /*-----------------------------*/
3174   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3175   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3176 
3177   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3178   for (proc=0,k=0; proc<size; proc++){
3179     if (!len_s[proc]) continue;
3180     i = owners[proc];
3181     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3182     k++;
3183   }
3184 
3185   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3186   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3187   ierr = PetscFree(status);CHKERRQ(ierr);
3188 
3189   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3190   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3191 
3192   /* insert mat values of mpimat */
3193   /*----------------------------*/
3194   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3195   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3196   nextrow = buf_ri_k + merge->nrecv;
3197   nextai  = nextrow + merge->nrecv;
3198 
3199   for (k=0; k<merge->nrecv; k++){
3200     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3201     nrows = *(buf_ri_k[k]);
3202     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3203     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3204   }
3205 
3206   /* set values of ba */
3207   m = merge->rowmap.n;
3208   for (i=0; i<m; i++) {
3209     arow = owners[rank] + i;
3210     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3211     bnzi = bi[i+1] - bi[i];
3212     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3213 
3214     /* add local non-zero vals of this proc's seqmat into ba */
3215     anzi = ai[arow+1] - ai[arow];
3216     aj   = a->j + ai[arow];
3217     aa   = a->a + ai[arow];
3218     nextaj = 0;
3219     for (j=0; nextaj<anzi; j++){
3220       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3221         ba_i[j] += aa[nextaj++];
3222       }
3223     }
3224 
3225     /* add received vals into ba */
3226     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3227       /* i-th row */
3228       if (i == *nextrow[k]) {
3229         anzi = *(nextai[k]+1) - *nextai[k];
3230         aj   = buf_rj[k] + *(nextai[k]);
3231         aa   = abuf_r[k] + *(nextai[k]);
3232         nextaj = 0;
3233         for (j=0; nextaj<anzi; j++){
3234           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3235             ba_i[j] += aa[nextaj++];
3236           }
3237         }
3238         nextrow[k]++; nextai[k]++;
3239       }
3240     }
3241     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3242   }
3243   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3244   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3245 
3246   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3247   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3248   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3249   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3250   PetscFunctionReturn(0);
3251 }
3252 
3253 static PetscEvent logkey_seqstompisym = 0;
3254 #undef __FUNCT__
3255 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3256 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3257 {
3258   PetscErrorCode       ierr;
3259   Mat                  B_mpi;
3260   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3261   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3262   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3263   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3264   PetscInt             len,proc,*dnz,*onz;
3265   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3266   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3267   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3268   MPI_Status           *status;
3269   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3270   PetscBT              lnkbt;
3271   Mat_Merge_SeqsToMPI  *merge;
3272   PetscObjectContainer container;
3273 
3274   PetscFunctionBegin;
3275   if (!logkey_seqstompisym) {
3276     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3277   }
3278   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3279 
3280   /* make sure it is a PETSc comm */
3281   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3282   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3283   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3284 
3285   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3286   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3287 
3288   /* determine row ownership */
3289   /*---------------------------------------------------------*/
3290   merge->rowmap.n = m;
3291   merge->rowmap.N = M;
3292   merge->rowmap.bs = 1;
3293   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3294   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3295   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3296 
3297   m      = merge->rowmap.n;
3298   M      = merge->rowmap.N;
3299   owners = merge->rowmap.range;
3300 
3301   /* determine the number of messages to send, their lengths */
3302   /*---------------------------------------------------------*/
3303   len_s  = merge->len_s;
3304 
3305   len = 0;  /* length of buf_si[] */
3306   merge->nsend = 0;
3307   for (proc=0; proc<size; proc++){
3308     len_si[proc] = 0;
3309     if (proc == rank){
3310       len_s[proc] = 0;
3311     } else {
3312       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3313       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3314     }
3315     if (len_s[proc]) {
3316       merge->nsend++;
3317       nrows = 0;
3318       for (i=owners[proc]; i<owners[proc+1]; i++){
3319         if (ai[i+1] > ai[i]) nrows++;
3320       }
3321       len_si[proc] = 2*(nrows+1);
3322       len += len_si[proc];
3323     }
3324   }
3325 
3326   /* determine the number and length of messages to receive for ij-structure */
3327   /*-------------------------------------------------------------------------*/
3328   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3329   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3330 
3331   /* post the Irecv of j-structure */
3332   /*-------------------------------*/
3333   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
3334   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3335 
3336   /* post the Isend of j-structure */
3337   /*--------------------------------*/
3338   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3339   sj_waits = si_waits + merge->nsend;
3340 
3341   for (proc=0, k=0; proc<size; proc++){
3342     if (!len_s[proc]) continue;
3343     i = owners[proc];
3344     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3345     k++;
3346   }
3347 
3348   /* receives and sends of j-structure are complete */
3349   /*------------------------------------------------*/
3350   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3351   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3352 
3353   /* send and recv i-structure */
3354   /*---------------------------*/
3355   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
3356   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3357 
3358   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3359   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3360   for (proc=0,k=0; proc<size; proc++){
3361     if (!len_s[proc]) continue;
3362     /* form outgoing message for i-structure:
3363          buf_si[0]:                 nrows to be sent
3364                [1:nrows]:           row index (global)
3365                [nrows+1:2*nrows+1]: i-structure index
3366     */
3367     /*-------------------------------------------*/
3368     nrows = len_si[proc]/2 - 1;
3369     buf_si_i    = buf_si + nrows+1;
3370     buf_si[0]   = nrows;
3371     buf_si_i[0] = 0;
3372     nrows = 0;
3373     for (i=owners[proc]; i<owners[proc+1]; i++){
3374       anzi = ai[i+1] - ai[i];
3375       if (anzi) {
3376         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3377         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3378         nrows++;
3379       }
3380     }
3381     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3382     k++;
3383     buf_si += len_si[proc];
3384   }
3385 
3386   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3387   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3388 
3389   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3390   for (i=0; i<merge->nrecv; i++){
3391     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3392   }
3393 
3394   ierr = PetscFree(len_si);CHKERRQ(ierr);
3395   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3396   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3397   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3398   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3399   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3400   ierr = PetscFree(status);CHKERRQ(ierr);
3401 
3402   /* compute a local seq matrix in each processor */
3403   /*----------------------------------------------*/
3404   /* allocate bi array and free space for accumulating nonzero column info */
3405   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3406   bi[0] = 0;
3407 
3408   /* create and initialize a linked list */
3409   nlnk = N+1;
3410   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3411 
3412   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3413   len = 0;
3414   len  = ai[owners[rank+1]] - ai[owners[rank]];
3415   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3416   current_space = free_space;
3417 
3418   /* determine symbolic info for each local row */
3419   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3420   nextrow = buf_ri_k + merge->nrecv;
3421   nextai  = nextrow + merge->nrecv;
3422   for (k=0; k<merge->nrecv; k++){
3423     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3424     nrows = *buf_ri_k[k];
3425     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3426     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3427   }
3428 
3429   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3430   len = 0;
3431   for (i=0;i<m;i++) {
3432     bnzi   = 0;
3433     /* add local non-zero cols of this proc's seqmat into lnk */
3434     arow   = owners[rank] + i;
3435     anzi   = ai[arow+1] - ai[arow];
3436     aj     = a->j + ai[arow];
3437     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3438     bnzi += nlnk;
3439     /* add received col data into lnk */
3440     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3441       if (i == *nextrow[k]) { /* i-th row */
3442         anzi = *(nextai[k]+1) - *nextai[k];
3443         aj   = buf_rj[k] + *nextai[k];
3444         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3445         bnzi += nlnk;
3446         nextrow[k]++; nextai[k]++;
3447       }
3448     }
3449     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3450 
3451     /* if free space is not available, make more free space */
3452     if (current_space->local_remaining<bnzi) {
3453       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3454       nspacedouble++;
3455     }
3456     /* copy data into free space, then initialize lnk */
3457     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3458     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3459 
3460     current_space->array           += bnzi;
3461     current_space->local_used      += bnzi;
3462     current_space->local_remaining -= bnzi;
3463 
3464     bi[i+1] = bi[i] + bnzi;
3465   }
3466 
3467   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3468 
3469   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3470   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3471   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3472 
3473   /* create symbolic parallel matrix B_mpi */
3474   /*---------------------------------------*/
3475   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3476   if (n==PETSC_DECIDE) {
3477     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3478   } else {
3479     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3480   }
3481   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3482   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3483   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3484 
3485   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3486   B_mpi->assembled     = PETSC_FALSE;
3487   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3488   merge->bi            = bi;
3489   merge->bj            = bj;
3490   merge->buf_ri        = buf_ri;
3491   merge->buf_rj        = buf_rj;
3492   merge->coi           = PETSC_NULL;
3493   merge->coj           = PETSC_NULL;
3494   merge->owners_co     = PETSC_NULL;
3495 
3496   /* attach the supporting struct to B_mpi for reuse */
3497   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3498   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3499   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3500   *mpimat = B_mpi;
3501 
3502   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3503   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3504   PetscFunctionReturn(0);
3505 }
3506 
3507 static PetscEvent logkey_seqstompi = 0;
3508 #undef __FUNCT__
3509 #define __FUNCT__ "MatMerge_SeqsToMPI"
3510 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3511 {
3512   PetscErrorCode   ierr;
3513 
3514   PetscFunctionBegin;
3515   if (!logkey_seqstompi) {
3516     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3517   }
3518   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3519   if (scall == MAT_INITIAL_MATRIX){
3520     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3521   }
3522   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3523   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3524   PetscFunctionReturn(0);
3525 }
3526 static PetscEvent logkey_getlocalmat = 0;
3527 #undef __FUNCT__
3528 #define __FUNCT__ "MatGetLocalMat"
3529 /*@C
3530      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3531 
3532     Not Collective
3533 
3534    Input Parameters:
3535 +    A - the matrix
3536 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3537 
3538    Output Parameter:
3539 .    A_loc - the local sequential matrix generated
3540 
3541     Level: developer
3542 
3543 @*/
3544 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3545 {
3546   PetscErrorCode  ierr;
3547   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3548   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3549   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3550   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3551   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
3552   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3553 
3554   PetscFunctionBegin;
3555   if (!logkey_getlocalmat) {
3556     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3557   }
3558   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3559   if (scall == MAT_INITIAL_MATRIX){
3560     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3561     ci[0] = 0;
3562     for (i=0; i<am; i++){
3563       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3564     }
3565     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3566     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3567     k = 0;
3568     for (i=0; i<am; i++) {
3569       ncols_o = bi[i+1] - bi[i];
3570       ncols_d = ai[i+1] - ai[i];
3571       /* off-diagonal portion of A */
3572       for (jo=0; jo<ncols_o; jo++) {
3573         col = cmap[*bj];
3574         if (col >= cstart) break;
3575         cj[k]   = col; bj++;
3576         ca[k++] = *ba++;
3577       }
3578       /* diagonal portion of A */
3579       for (j=0; j<ncols_d; j++) {
3580         cj[k]   = cstart + *aj++;
3581         ca[k++] = *aa++;
3582       }
3583       /* off-diagonal portion of A */
3584       for (j=jo; j<ncols_o; j++) {
3585         cj[k]   = cmap[*bj++];
3586         ca[k++] = *ba++;
3587       }
3588     }
3589     /* put together the new matrix */
3590     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3591     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3592     /* Since these are PETSc arrays, change flags to free them as necessary. */
3593     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
3594     mat->free_a  = PETSC_TRUE;
3595     mat->free_ij = PETSC_TRUE;
3596     mat->nonew   = 0;
3597   } else if (scall == MAT_REUSE_MATRIX){
3598     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3599     ci = mat->i; cj = mat->j; ca = mat->a;
3600     for (i=0; i<am; i++) {
3601       /* off-diagonal portion of A */
3602       ncols_o = bi[i+1] - bi[i];
3603       for (jo=0; jo<ncols_o; jo++) {
3604         col = cmap[*bj];
3605         if (col >= cstart) break;
3606         *ca++ = *ba++; bj++;
3607       }
3608       /* diagonal portion of A */
3609       ncols_d = ai[i+1] - ai[i];
3610       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3611       /* off-diagonal portion of A */
3612       for (j=jo; j<ncols_o; j++) {
3613         *ca++ = *ba++; bj++;
3614       }
3615     }
3616   } else {
3617     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3618   }
3619 
3620   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3621   PetscFunctionReturn(0);
3622 }
3623 
3624 static PetscEvent logkey_getlocalmatcondensed = 0;
3625 #undef __FUNCT__
3626 #define __FUNCT__ "MatGetLocalMatCondensed"
3627 /*@C
3628      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3629 
3630     Not Collective
3631 
3632    Input Parameters:
3633 +    A - the matrix
3634 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3635 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3636 
3637    Output Parameter:
3638 .    A_loc - the local sequential matrix generated
3639 
3640     Level: developer
3641 
3642 @*/
3643 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3644 {
3645   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3646   PetscErrorCode    ierr;
3647   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3648   IS                isrowa,iscola;
3649   Mat               *aloc;
3650 
3651   PetscFunctionBegin;
3652   if (!logkey_getlocalmatcondensed) {
3653     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3654   }
3655   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3656   if (!row){
3657     start = A->rmap.rstart; end = A->rmap.rend;
3658     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3659   } else {
3660     isrowa = *row;
3661   }
3662   if (!col){
3663     start = A->cmap.rstart;
3664     cmap  = a->garray;
3665     nzA   = a->A->cmap.n;
3666     nzB   = a->B->cmap.n;
3667     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3668     ncols = 0;
3669     for (i=0; i<nzB; i++) {
3670       if (cmap[i] < start) idx[ncols++] = cmap[i];
3671       else break;
3672     }
3673     imark = i;
3674     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3675     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3676     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3677     ierr = PetscFree(idx);CHKERRQ(ierr);
3678   } else {
3679     iscola = *col;
3680   }
3681   if (scall != MAT_INITIAL_MATRIX){
3682     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3683     aloc[0] = *A_loc;
3684   }
3685   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3686   *A_loc = aloc[0];
3687   ierr = PetscFree(aloc);CHKERRQ(ierr);
3688   if (!row){
3689     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3690   }
3691   if (!col){
3692     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3693   }
3694   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3695   PetscFunctionReturn(0);
3696 }
3697 
3698 static PetscEvent logkey_GetBrowsOfAcols = 0;
3699 #undef __FUNCT__
3700 #define __FUNCT__ "MatGetBrowsOfAcols"
3701 /*@C
3702     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3703 
3704     Collective on Mat
3705 
3706    Input Parameters:
3707 +    A,B - the matrices in mpiaij format
3708 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3709 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3710 
3711    Output Parameter:
3712 +    rowb, colb - index sets of rows and columns of B to extract
3713 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
3714 -    B_seq - the sequential matrix generated
3715 
3716     Level: developer
3717 
3718 @*/
3719 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3720 {
3721   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3722   PetscErrorCode    ierr;
3723   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3724   IS                isrowb,iscolb;
3725   Mat               *bseq;
3726 
3727   PetscFunctionBegin;
3728   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
3729     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
3730   }
3731   if (!logkey_GetBrowsOfAcols) {
3732     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3733   }
3734   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3735 
3736   if (scall == MAT_INITIAL_MATRIX){
3737     start = A->cmap.rstart;
3738     cmap  = a->garray;
3739     nzA   = a->A->cmap.n;
3740     nzB   = a->B->cmap.n;
3741     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3742     ncols = 0;
3743     for (i=0; i<nzB; i++) {  /* row < local row index */
3744       if (cmap[i] < start) idx[ncols++] = cmap[i];
3745       else break;
3746     }
3747     imark = i;
3748     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3749     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3750     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3751     ierr = PetscFree(idx);CHKERRQ(ierr);
3752     *brstart = imark;
3753     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
3754   } else {
3755     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3756     isrowb = *rowb; iscolb = *colb;
3757     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3758     bseq[0] = *B_seq;
3759   }
3760   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3761   *B_seq = bseq[0];
3762   ierr = PetscFree(bseq);CHKERRQ(ierr);
3763   if (!rowb){
3764     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3765   } else {
3766     *rowb = isrowb;
3767   }
3768   if (!colb){
3769     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3770   } else {
3771     *colb = iscolb;
3772   }
3773   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3774   PetscFunctionReturn(0);
3775 }
3776 
3777 static PetscEvent logkey_GetBrowsOfAocols = 0;
3778 #undef __FUNCT__
3779 #define __FUNCT__ "MatGetBrowsOfAoCols"
3780 /*@C
3781     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3782     of the OFF-DIAGONAL portion of local A
3783 
3784     Collective on Mat
3785 
3786    Input Parameters:
3787 +    A,B - the matrices in mpiaij format
3788 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3789 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3790 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3791 
3792    Output Parameter:
3793 +    B_oth - the sequential matrix generated
3794 
3795     Level: developer
3796 
3797 @*/
3798 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3799 {
3800   VecScatter_MPI_General *gen_to,*gen_from;
3801   PetscErrorCode         ierr;
3802   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
3803   Mat_SeqAIJ             *b_oth;
3804   VecScatter             ctx=a->Mvctx;
3805   MPI_Comm               comm=ctx->comm;
3806   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3807   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
3808   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3809   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3810   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
3811   MPI_Status             *sstatus,rstatus;
3812   PetscInt               *cols;
3813   PetscScalar            *vals;
3814   PetscMPIInt            j;
3815 
3816   PetscFunctionBegin;
3817   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
3818     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
3819   }
3820   if (!logkey_GetBrowsOfAocols) {
3821     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3822   }
3823   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3824   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3825 
3826   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3827   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3828   rvalues  = gen_from->values; /* holds the length of sending row */
3829   svalues  = gen_to->values;   /* holds the length of receiving row */
3830   nrecvs   = gen_from->n;
3831   nsends   = gen_to->n;
3832 
3833   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
3834   srow     = gen_to->indices;   /* local row index to be sent */
3835   rstarts  = gen_from->starts;
3836   sstarts  = gen_to->starts;
3837   rprocs   = gen_from->procs;
3838   sprocs   = gen_to->procs;
3839   sstatus  = gen_to->sstatus;
3840 
3841   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3842   if (scall == MAT_INITIAL_MATRIX){
3843     /* i-array */
3844     /*---------*/
3845     /*  post receives */
3846     for (i=0; i<nrecvs; i++){
3847       rowlen = (PetscInt*)rvalues + rstarts[i];
3848       nrows = rstarts[i+1]-rstarts[i];
3849       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3850     }
3851 
3852     /* pack the outgoing message */
3853     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3854     rstartsj = sstartsj + nsends +1;
3855     sstartsj[0] = 0;  rstartsj[0] = 0;
3856     len = 0; /* total length of j or a array to be sent */
3857     k = 0;
3858     for (i=0; i<nsends; i++){
3859       rowlen = (PetscInt*)svalues + sstarts[i];
3860       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3861       for (j=0; j<nrows; j++) {
3862         row = srow[k] + B->rmap.range[rank]; /* global row idx */
3863         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3864         len += rowlen[j];
3865         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3866         k++;
3867       }
3868       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3869        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3870     }
3871     /* recvs and sends of i-array are completed */
3872     i = nrecvs;
3873     while (i--) {
3874       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3875     }
3876     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3877     /* allocate buffers for sending j and a arrays */
3878     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3879     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3880 
3881     /* create i-array of B_oth */
3882     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3883     b_othi[0] = 0;
3884     len = 0; /* total length of j or a array to be received */
3885     k = 0;
3886     for (i=0; i<nrecvs; i++){
3887       rowlen = (PetscInt*)rvalues + rstarts[i];
3888       nrows = rstarts[i+1]-rstarts[i];
3889       for (j=0; j<nrows; j++) {
3890         b_othi[k+1] = b_othi[k] + rowlen[j];
3891         len += rowlen[j]; k++;
3892       }
3893       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3894     }
3895 
3896     /* allocate space for j and a arrrays of B_oth */
3897     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3898     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3899 
3900     /* j-array */
3901     /*---------*/
3902     /*  post receives of j-array */
3903     for (i=0; i<nrecvs; i++){
3904       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3905       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3906     }
3907     k = 0;
3908     for (i=0; i<nsends; i++){
3909       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3910       bufJ = bufj+sstartsj[i];
3911       for (j=0; j<nrows; j++) {
3912         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
3913         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3914         for (l=0; l<ncols; l++){
3915           *bufJ++ = cols[l];
3916         }
3917         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3918       }
3919       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3920     }
3921 
3922     /* recvs and sends of j-array are completed */
3923     i = nrecvs;
3924     while (i--) {
3925       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3926     }
3927     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3928   } else if (scall == MAT_REUSE_MATRIX){
3929     sstartsj = *startsj;
3930     rstartsj = sstartsj + nsends +1;
3931     bufa     = *bufa_ptr;
3932     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3933     b_otha   = b_oth->a;
3934   } else {
3935     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3936   }
3937 
3938   /* a-array */
3939   /*---------*/
3940   /*  post receives of a-array */
3941   for (i=0; i<nrecvs; i++){
3942     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3943     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3944   }
3945   k = 0;
3946   for (i=0; i<nsends; i++){
3947     nrows = sstarts[i+1]-sstarts[i];
3948     bufA = bufa+sstartsj[i];
3949     for (j=0; j<nrows; j++) {
3950       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
3951       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3952       for (l=0; l<ncols; l++){
3953         *bufA++ = vals[l];
3954       }
3955       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3956 
3957     }
3958     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3959   }
3960   /* recvs and sends of a-array are completed */
3961   i = nrecvs;
3962   while (i--) {
3963     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3964   }
3965   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3966   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
3967 
3968   if (scall == MAT_INITIAL_MATRIX){
3969     /* put together the new matrix */
3970     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3971 
3972     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3973     /* Since these are PETSc arrays, change flags to free them as necessary. */
3974     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
3975     b_oth->free_a  = PETSC_TRUE;
3976     b_oth->free_ij = PETSC_TRUE;
3977     b_oth->nonew   = 0;
3978 
3979     ierr = PetscFree(bufj);CHKERRQ(ierr);
3980     if (!startsj || !bufa_ptr){
3981       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3982       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3983     } else {
3984       *startsj  = sstartsj;
3985       *bufa_ptr = bufa;
3986     }
3987   }
3988   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3989 
3990   PetscFunctionReturn(0);
3991 }
3992 
3993 #undef __FUNCT__
3994 #define __FUNCT__ "MatGetCommunicationStructs"
3995 /*@C
3996   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
3997 
3998   Not Collective
3999 
4000   Input Parameters:
4001 . A - The matrix in mpiaij format
4002 
4003   Output Parameter:
4004 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4005 . colmap - A map from global column index to local index into lvec
4006 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4007 
4008   Level: developer
4009 
4010 @*/
4011 #if defined (PETSC_USE_CTABLE)
4012 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4013 #else
4014 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4015 #endif
4016 {
4017   Mat_MPIAIJ *a;
4018 
4019   PetscFunctionBegin;
4020   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4021   PetscValidPointer(lvec, 2)
4022   PetscValidPointer(colmap, 3)
4023   PetscValidPointer(multScatter, 4)
4024   a = (Mat_MPIAIJ *) A->data;
4025   if (lvec) *lvec = a->lvec;
4026   if (colmap) *colmap = a->colmap;
4027   if (multScatter) *multScatter = a->Mvctx;
4028   PetscFunctionReturn(0);
4029 }
4030 
4031 EXTERN_C_BEGIN
4032 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*);
4033 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*);
4034 EXTERN_C_END
4035 
4036 /*MC
4037    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4038 
4039    Options Database Keys:
4040 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4041 
4042   Level: beginner
4043 
4044 .seealso: MatCreateMPIAIJ
4045 M*/
4046 
4047 EXTERN_C_BEGIN
4048 #undef __FUNCT__
4049 #define __FUNCT__ "MatCreate_MPIAIJ"
4050 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4051 {
4052   Mat_MPIAIJ     *b;
4053   PetscErrorCode ierr;
4054   PetscMPIInt    size;
4055 
4056   PetscFunctionBegin;
4057   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
4058 
4059   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
4060   B->data         = (void*)b;
4061   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4062   B->factor       = 0;
4063   B->rmap.bs      = 1;
4064   B->assembled    = PETSC_FALSE;
4065   B->mapping      = 0;
4066 
4067   B->insertmode      = NOT_SET_VALUES;
4068   b->size            = size;
4069   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
4070 
4071   /* build cache for off array entries formed */
4072   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
4073   b->donotstash  = PETSC_FALSE;
4074   b->colmap      = 0;
4075   b->garray      = 0;
4076   b->roworiented = PETSC_TRUE;
4077 
4078   /* stuff used for matrix vector multiply */
4079   b->lvec      = PETSC_NULL;
4080   b->Mvctx     = PETSC_NULL;
4081 
4082   /* stuff for MatGetRow() */
4083   b->rowindices   = 0;
4084   b->rowvalues    = 0;
4085   b->getrowactive = PETSC_FALSE;
4086 
4087 
4088   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4089                                      "MatStoreValues_MPIAIJ",
4090                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4091   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4092                                      "MatRetrieveValues_MPIAIJ",
4093                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4094   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4095 				     "MatGetDiagonalBlock_MPIAIJ",
4096                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4097   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4098 				     "MatIsTranspose_MPIAIJ",
4099 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4100   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4101 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4102 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4103   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4104 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4105 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4106   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4107 				     "MatDiagonalScaleLocal_MPIAIJ",
4108 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4109   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4110                                      "MatConvert_MPIAIJ_MPICSRPERM",
4111                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
4112   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4113                                      "MatConvert_MPIAIJ_MPICRL",
4114                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
4115   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4116   PetscFunctionReturn(0);
4117 }
4118 EXTERN_C_END
4119 
4120 /*
4121     Special version for direct calls from Fortran
4122 */
4123 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4124 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4125 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4126 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4127 #endif
4128 
4129 /* Change these macros so can be used in void function */
4130 #undef CHKERRQ
4131 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
4132 #undef SETERRQ2
4133 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
4134 #undef SETERRQ
4135 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
4136 
4137 EXTERN_C_BEGIN
4138 #undef __FUNCT__
4139 #define __FUNCT__ "matsetvaluesmpiaij_"
4140 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4141 {
4142   Mat            mat = *mmat;
4143   PetscInt       m = *mm, n = *mn;
4144   InsertMode     addv = *maddv;
4145   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
4146   PetscScalar    value;
4147   PetscErrorCode ierr;
4148 
4149   MatPreallocated(mat);
4150   if (mat->insertmode == NOT_SET_VALUES) {
4151     mat->insertmode = addv;
4152   }
4153 #if defined(PETSC_USE_DEBUG)
4154   else if (mat->insertmode != addv) {
4155     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4156   }
4157 #endif
4158   {
4159   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4160   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4161   PetscTruth     roworiented = aij->roworiented;
4162 
4163   /* Some Variables required in the macro */
4164   Mat            A = aij->A;
4165   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
4166   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4167   PetscScalar    *aa = a->a;
4168   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4169   Mat            B = aij->B;
4170   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4171   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4172   PetscScalar    *ba = b->a;
4173 
4174   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4175   PetscInt       nonew = a->nonew;
4176   PetscScalar    *ap1,*ap2;
4177 
4178   PetscFunctionBegin;
4179   for (i=0; i<m; i++) {
4180     if (im[i] < 0) continue;
4181 #if defined(PETSC_USE_DEBUG)
4182     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4183 #endif
4184     if (im[i] >= rstart && im[i] < rend) {
4185       row      = im[i] - rstart;
4186       lastcol1 = -1;
4187       rp1      = aj + ai[row];
4188       ap1      = aa + ai[row];
4189       rmax1    = aimax[row];
4190       nrow1    = ailen[row];
4191       low1     = 0;
4192       high1    = nrow1;
4193       lastcol2 = -1;
4194       rp2      = bj + bi[row];
4195       ap2      = ba + bi[row];
4196       rmax2    = bimax[row];
4197       nrow2    = bilen[row];
4198       low2     = 0;
4199       high2    = nrow2;
4200 
4201       for (j=0; j<n; j++) {
4202         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4203         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4204         if (in[j] >= cstart && in[j] < cend){
4205           col = in[j] - cstart;
4206           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4207         } else if (in[j] < 0) continue;
4208 #if defined(PETSC_USE_DEBUG)
4209         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4210 #endif
4211         else {
4212           if (mat->was_assembled) {
4213             if (!aij->colmap) {
4214               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4215             }
4216 #if defined (PETSC_USE_CTABLE)
4217             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4218 	    col--;
4219 #else
4220             col = aij->colmap[in[j]] - 1;
4221 #endif
4222             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4223               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4224               col =  in[j];
4225               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4226               B = aij->B;
4227               b = (Mat_SeqAIJ*)B->data;
4228               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4229               rp2      = bj + bi[row];
4230               ap2      = ba + bi[row];
4231               rmax2    = bimax[row];
4232               nrow2    = bilen[row];
4233               low2     = 0;
4234               high2    = nrow2;
4235               bm       = aij->B->rmap.n;
4236               ba = b->a;
4237             }
4238           } else col = in[j];
4239           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4240         }
4241       }
4242     } else {
4243       if (!aij->donotstash) {
4244         if (roworiented) {
4245           if (ignorezeroentries && v[i*n] == 0.0) continue;
4246           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4247         } else {
4248           if (ignorezeroentries && v[i] == 0.0) continue;
4249           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4250         }
4251       }
4252     }
4253   }}
4254   PetscFunctionReturnVoid();
4255 }
4256 EXTERN_C_END
4257