xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision d32f9abdbc052d6e1fd06679b17a55415c3aae30)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap.n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap.n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap.n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap.n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 
207 #define CHUNKSIZE   15
208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
209 { \
210     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
211     lastcol1 = col;\
212     while (high1-low1 > 5) { \
213       t = (low1+high1)/2; \
214       if (rp1[t] > col) high1 = t; \
215       else             low1  = t; \
216     } \
217       for (_i=low1; _i<high1; _i++) { \
218         if (rp1[_i] > col) break; \
219         if (rp1[_i] == col) { \
220           if (addv == ADD_VALUES) ap1[_i] += value;   \
221           else                    ap1[_i] = value; \
222           goto a_noinsert; \
223         } \
224       }  \
225       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
226       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
227       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
228       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
229       N = nrow1++ - 1; a->nz++; high1++; \
230       /* shift up all the later entries in this row */ \
231       for (ii=N; ii>=_i; ii--) { \
232         rp1[ii+1] = rp1[ii]; \
233         ap1[ii+1] = ap1[ii]; \
234       } \
235       rp1[_i] = col;  \
236       ap1[_i] = value;  \
237       a_noinsert: ; \
238       ailen[row] = nrow1; \
239 }
240 
241 
242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
243 { \
244     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
245     lastcol2 = col;\
246     while (high2-low2 > 5) { \
247       t = (low2+high2)/2; \
248       if (rp2[t] > col) high2 = t; \
249       else             low2  = t; \
250     } \
251     for (_i=low2; _i<high2; _i++) {		\
252       if (rp2[_i] > col) break;			\
253       if (rp2[_i] == col) {			      \
254 	if (addv == ADD_VALUES) ap2[_i] += value;     \
255 	else                    ap2[_i] = value;      \
256 	goto b_noinsert;			      \
257       }						      \
258     }							      \
259     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
260     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
261     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
262     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
263     N = nrow2++ - 1; b->nz++; high2++;					\
264     /* shift up all the later entries in this row */			\
265     for (ii=N; ii>=_i; ii--) {						\
266       rp2[ii+1] = rp2[ii];						\
267       ap2[ii+1] = ap2[ii];						\
268     }									\
269     rp2[_i] = col;							\
270     ap2[_i] = value;							\
271     b_noinsert: ;								\
272     bilen[row] = nrow2;							\
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
278 {
279   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
280   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
281   PetscErrorCode ierr;
282   PetscInt       l,*garray = mat->garray,diag;
283 
284   PetscFunctionBegin;
285   /* code only works for square matrices A */
286 
287   /* find size of row to the left of the diagonal part */
288   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
289   row  = row - diag;
290   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
291     if (garray[b->j[b->i[row]+l]] > diag) break;
292   }
293   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
294 
295   /* diagonal part */
296   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
297 
298   /* right of diagonal part */
299   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValues_MPIAIJ"
305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
306 {
307   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
308   PetscScalar    value;
309   PetscErrorCode ierr;
310   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
311   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
312   PetscTruth     roworiented = aij->roworiented;
313 
314   /* Some Variables required in the macro */
315   Mat            A = aij->A;
316   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
317   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
318   MatScalar      *aa = a->a;
319   PetscTruth     ignorezeroentries = a->ignorezeroentries;
320   Mat            B = aij->B;
321   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
322   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
323   MatScalar      *ba = b->a;
324 
325   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
326   PetscInt       nonew = a->nonew;
327   MatScalar      *ap1,*ap2;
328 
329   PetscFunctionBegin;
330   for (i=0; i<m; i++) {
331     if (im[i] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
334 #endif
335     if (im[i] >= rstart && im[i] < rend) {
336       row      = im[i] - rstart;
337       lastcol1 = -1;
338       rp1      = aj + ai[row];
339       ap1      = aa + ai[row];
340       rmax1    = aimax[row];
341       nrow1    = ailen[row];
342       low1     = 0;
343       high1    = nrow1;
344       lastcol2 = -1;
345       rp2      = bj + bi[row];
346       ap2      = ba + bi[row];
347       rmax2    = bimax[row];
348       nrow2    = bilen[row];
349       low2     = 0;
350       high2    = nrow2;
351 
352       for (j=0; j<n; j++) {
353         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
354         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
355         if (in[j] >= cstart && in[j] < cend){
356           col = in[j] - cstart;
357           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
358         } else if (in[j] < 0) continue;
359 #if defined(PETSC_USE_DEBUG)
360         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
361 #endif
362         else {
363           if (mat->was_assembled) {
364             if (!aij->colmap) {
365               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
366             }
367 #if defined (PETSC_USE_CTABLE)
368             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
369 	    col--;
370 #else
371             col = aij->colmap[in[j]] - 1;
372 #endif
373             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
374               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
375               col =  in[j];
376               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
377               B = aij->B;
378               b = (Mat_SeqAIJ*)B->data;
379               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
380               rp2      = bj + bi[row];
381               ap2      = ba + bi[row];
382               rmax2    = bimax[row];
383               nrow2    = bilen[row];
384               low2     = 0;
385               high2    = nrow2;
386               bm       = aij->B->rmap.n;
387               ba = b->a;
388             }
389           } else col = in[j];
390           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
391         }
392       }
393     } else {
394       if (!aij->donotstash) {
395         if (roworiented) {
396           if (ignorezeroentries && v[i*n] == 0.0) continue;
397           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
398         } else {
399           if (ignorezeroentries && v[i] == 0.0) continue;
400           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
401         }
402       }
403     }
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "MatGetValues_MPIAIJ"
410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
411 {
412   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
413   PetscErrorCode ierr;
414   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
415   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
416 
417   PetscFunctionBegin;
418   for (i=0; i<m; i++) {
419     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
420     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
421     if (idxm[i] >= rstart && idxm[i] < rend) {
422       row = idxm[i] - rstart;
423       for (j=0; j<n; j++) {
424         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
425         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
426         if (idxn[j] >= cstart && idxn[j] < cend){
427           col = idxn[j] - cstart;
428           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
429         } else {
430           if (!aij->colmap) {
431             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
432           }
433 #if defined (PETSC_USE_CTABLE)
434           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
435           col --;
436 #else
437           col = aij->colmap[idxn[j]] - 1;
438 #endif
439           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
440           else {
441             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
442           }
443         }
444       }
445     } else {
446       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
447     }
448   }
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) {
469     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
470   }
471   mat->insertmode = addv; /* in case this processor had no cache */
472 
473   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr);
474   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
475   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
485   PetscErrorCode ierr;
486   PetscMPIInt    n;
487   PetscInt       i,j,rstart,ncols,flg;
488   PetscInt       *row,*col;
489   PetscTruth     other_disassembled;
490   PetscScalar    *val;
491   InsertMode     addv = mat->insertmode;
492 
493   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
494   PetscFunctionBegin;
495   if (!aij->donotstash) {
496     while (1) {
497       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
498       if (!flg) break;
499 
500       for (i=0; i<n;) {
501         /* Now identify the consecutive vals belonging to the same row */
502         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
503         if (j < n) ncols = j-i;
504         else       ncols = n-i;
505         /* Now assemble all these values with a single function call */
506         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
507         i = j;
508       }
509     }
510     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
511   }
512   a->compressedrow.use     = PETSC_FALSE;
513   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
514   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
515 
516   /* determine if any processor has disassembled, if so we must
517      also disassemble ourselfs, in order that we may reassemble. */
518   /*
519      if nonzero structure of submatrix B cannot change then we know that
520      no processor disassembled thus we can skip this stuff
521   */
522   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
523     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
524     if (mat->was_assembled && !other_disassembled) {
525       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526     }
527   }
528   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
529     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
530   }
531   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
532   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
533   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
534   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
535 
536   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
537   aij->rowvalues = 0;
538 
539   /* used by MatAXPY() */
540   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
541   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
542 
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
549 {
550   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
551   PetscErrorCode ierr;
552 
553   PetscFunctionBegin;
554   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
555   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatZeroRows_MPIAIJ"
561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
562 {
563   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
564   PetscErrorCode ierr;
565   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
566   PetscInt       i,*owners = A->rmap.range;
567   PetscInt       *nprocs,j,idx,nsends,row;
568   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
569   PetscInt       *rvalues,count,base,slen,*source;
570   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
571   MPI_Comm       comm = ((PetscObject)A)->comm;
572   MPI_Request    *send_waits,*recv_waits;
573   MPI_Status     recv_status,*send_status;
574 #if defined(PETSC_DEBUG)
575   PetscTruth     found = PETSC_FALSE;
576 #endif
577 
578   PetscFunctionBegin;
579   /*  first count number of contributors to each processor */
580   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
581   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
582   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
583   j = 0;
584   for (i=0; i<N; i++) {
585     if (lastidx > (idx = rows[i])) j = 0;
586     lastidx = idx;
587     for (; j<size; j++) {
588       if (idx >= owners[j] && idx < owners[j+1]) {
589         nprocs[2*j]++;
590         nprocs[2*j+1] = 1;
591         owner[i] = j;
592 #if defined(PETSC_DEBUG)
593         found = PETSC_TRUE;
594 #endif
595         break;
596       }
597     }
598 #if defined(PETSC_DEBUG)
599     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
600     found = PETSC_FALSE;
601 #endif
602   }
603   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
604 
605   /* inform other processors of number of messages and max length*/
606   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
607 
608   /* post receives:   */
609   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
610   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
611   for (i=0; i<nrecvs; i++) {
612     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
613   }
614 
615   /* do sends:
616       1) starts[i] gives the starting index in svalues for stuff going to
617          the ith processor
618   */
619   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
620   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
621   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
622   starts[0] = 0;
623   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
624   for (i=0; i<N; i++) {
625     svalues[starts[owner[i]]++] = rows[i];
626   }
627 
628   starts[0] = 0;
629   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
630   count = 0;
631   for (i=0; i<size; i++) {
632     if (nprocs[2*i+1]) {
633       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
634     }
635   }
636   ierr = PetscFree(starts);CHKERRQ(ierr);
637 
638   base = owners[rank];
639 
640   /*  wait on receives */
641   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
642   source = lens + nrecvs;
643   count  = nrecvs; slen = 0;
644   while (count) {
645     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
646     /* unpack receives into our local space */
647     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
648     source[imdex]  = recv_status.MPI_SOURCE;
649     lens[imdex]    = n;
650     slen          += n;
651     count--;
652   }
653   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
654 
655   /* move the data into the send scatter */
656   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
657   count = 0;
658   for (i=0; i<nrecvs; i++) {
659     values = rvalues + i*nmax;
660     for (j=0; j<lens[i]; j++) {
661       lrows[count++] = values[j] - base;
662     }
663   }
664   ierr = PetscFree(rvalues);CHKERRQ(ierr);
665   ierr = PetscFree(lens);CHKERRQ(ierr);
666   ierr = PetscFree(owner);CHKERRQ(ierr);
667   ierr = PetscFree(nprocs);CHKERRQ(ierr);
668 
669   /* actually zap the local rows */
670   /*
671         Zero the required rows. If the "diagonal block" of the matrix
672      is square and the user wishes to set the diagonal we use separate
673      code so that MatSetValues() is not called for each diagonal allocating
674      new memory, thus calling lots of mallocs and slowing things down.
675 
676        Contributed by: Matthew Knepley
677   */
678   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
679   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
680   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
681     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
682   } else if (diag != 0.0) {
683     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
684     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
685       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
687     }
688     for (i = 0; i < slen; i++) {
689       row  = lrows[i] + rstart;
690       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
691     }
692     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
693     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694   } else {
695     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
696   }
697   ierr = PetscFree(lrows);CHKERRQ(ierr);
698 
699   /* wait on sends */
700   if (nsends) {
701     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
702     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
703     ierr = PetscFree(send_status);CHKERRQ(ierr);
704   }
705   ierr = PetscFree(send_waits);CHKERRQ(ierr);
706   ierr = PetscFree(svalues);CHKERRQ(ierr);
707 
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatMult_MPIAIJ"
713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
714 {
715   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
716   PetscErrorCode ierr;
717   PetscInt       nt;
718 
719   PetscFunctionBegin;
720   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
721   if (nt != A->cmap.n) {
722     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
723   }
724   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
725   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
726   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMultAdd_MPIAIJ"
733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
734 {
735   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
736   PetscErrorCode ierr;
737 
738   PetscFunctionBegin;
739   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
740   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
741   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
742   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
743   PetscFunctionReturn(0);
744 }
745 
746 #undef __FUNCT__
747 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
749 {
750   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
751   PetscErrorCode ierr;
752   PetscTruth     merged;
753 
754   PetscFunctionBegin;
755   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
756   /* do nondiagonal part */
757   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
758   if (!merged) {
759     /* send it on its way */
760     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
761     /* do local part */
762     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
763     /* receive remote parts: note this assumes the values are not actually */
764     /* added in yy until the next line, */
765     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
766   } else {
767     /* do local part */
768     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
769     /* send it on its way */
770     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
771     /* values actually were received in the Begin() but we need to call this nop */
772     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773   }
774   PetscFunctionReturn(0);
775 }
776 
777 EXTERN_C_BEGIN
778 #undef __FUNCT__
779 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
781 {
782   MPI_Comm       comm;
783   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
784   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
785   IS             Me,Notme;
786   PetscErrorCode ierr;
787   PetscInt       M,N,first,last,*notme,i;
788   PetscMPIInt    size;
789 
790   PetscFunctionBegin;
791 
792   /* Easy test: symmetric diagonal block */
793   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
794   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
795   if (!*f) PetscFunctionReturn(0);
796   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
797   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
798   if (size == 1) PetscFunctionReturn(0);
799 
800   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
801   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
802   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
803   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
804   for (i=0; i<first; i++) notme[i] = i;
805   for (i=last; i<M; i++) notme[i-last+first] = i;
806   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
807   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
808   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
809   Aoff = Aoffs[0];
810   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
811   Boff = Boffs[0];
812   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
813   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
814   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
815   ierr = ISDestroy(Me);CHKERRQ(ierr);
816   ierr = ISDestroy(Notme);CHKERRQ(ierr);
817 
818   PetscFunctionReturn(0);
819 }
820 EXTERN_C_END
821 
822 #undef __FUNCT__
823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
825 {
826   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
827   PetscErrorCode ierr;
828 
829   PetscFunctionBegin;
830   /* do nondiagonal part */
831   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
832   /* send it on its way */
833   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
834   /* do local part */
835   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
836   /* receive remote parts */
837   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
838   PetscFunctionReturn(0);
839 }
840 
841 /*
842   This only works correctly for square matrices where the subblock A->A is the
843    diagonal block
844 */
845 #undef __FUNCT__
846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
848 {
849   PetscErrorCode ierr;
850   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
851 
852   PetscFunctionBegin;
853   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
854   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
855     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
856   }
857   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
858   PetscFunctionReturn(0);
859 }
860 
861 #undef __FUNCT__
862 #define __FUNCT__ "MatScale_MPIAIJ"
863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
864 {
865   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
866   PetscErrorCode ierr;
867 
868   PetscFunctionBegin;
869   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
870   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNCT__
875 #define __FUNCT__ "MatDestroy_MPIAIJ"
876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
877 {
878   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
879   PetscErrorCode ierr;
880 
881   PetscFunctionBegin;
882 #if defined(PETSC_USE_LOG)
883   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
884 #endif
885   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
886   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
887   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
888 #if defined (PETSC_USE_CTABLE)
889   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
890 #else
891   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
892 #endif
893   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
894   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
895   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
896   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
897   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
898   ierr = PetscFree(aij);CHKERRQ(ierr);
899 
900   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
901   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
902   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
903   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
904   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
905   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
906   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
908   PetscFunctionReturn(0);
909 }
910 
911 #undef __FUNCT__
912 #define __FUNCT__ "MatView_MPIAIJ_Binary"
913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
914 {
915   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
916   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
917   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
918   PetscErrorCode    ierr;
919   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
920   int               fd;
921   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
922   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
923   PetscScalar       *column_values;
924 
925   PetscFunctionBegin;
926   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
927   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
928   nz   = A->nz + B->nz;
929   if (!rank) {
930     header[0] = MAT_FILE_COOKIE;
931     header[1] = mat->rmap.N;
932     header[2] = mat->cmap.N;
933     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
934     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
935     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
936     /* get largest number of rows any processor has */
937     rlen = mat->rmap.n;
938     range = mat->rmap.range;
939     for (i=1; i<size; i++) {
940       rlen = PetscMax(rlen,range[i+1] - range[i]);
941     }
942   } else {
943     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
944     rlen = mat->rmap.n;
945   }
946 
947   /* load up the local row counts */
948   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
949   for (i=0; i<mat->rmap.n; i++) {
950     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
951   }
952 
953   /* store the row lengths to the file */
954   if (!rank) {
955     MPI_Status status;
956     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
957     for (i=1; i<size; i++) {
958       rlen = range[i+1] - range[i];
959       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
960       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
961     }
962   } else {
963     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
964   }
965   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
966 
967   /* load up the local column indices */
968   nzmax = nz; /* )th processor needs space a largest processor needs */
969   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
970   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
971   cnt  = 0;
972   for (i=0; i<mat->rmap.n; i++) {
973     for (j=B->i[i]; j<B->i[i+1]; j++) {
974       if ( (col = garray[B->j[j]]) > cstart) break;
975       column_indices[cnt++] = col;
976     }
977     for (k=A->i[i]; k<A->i[i+1]; k++) {
978       column_indices[cnt++] = A->j[k] + cstart;
979     }
980     for (; j<B->i[i+1]; j++) {
981       column_indices[cnt++] = garray[B->j[j]];
982     }
983   }
984   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
985 
986   /* store the column indices to the file */
987   if (!rank) {
988     MPI_Status status;
989     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
990     for (i=1; i<size; i++) {
991       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
992       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
993       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
994       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
995     }
996   } else {
997     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
998     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
999   }
1000   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1001 
1002   /* load up the local column values */
1003   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1004   cnt  = 0;
1005   for (i=0; i<mat->rmap.n; i++) {
1006     for (j=B->i[i]; j<B->i[i+1]; j++) {
1007       if ( garray[B->j[j]] > cstart) break;
1008       column_values[cnt++] = B->a[j];
1009     }
1010     for (k=A->i[i]; k<A->i[i+1]; k++) {
1011       column_values[cnt++] = A->a[k];
1012     }
1013     for (; j<B->i[i+1]; j++) {
1014       column_values[cnt++] = B->a[j];
1015     }
1016   }
1017   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1018 
1019   /* store the column values to the file */
1020   if (!rank) {
1021     MPI_Status status;
1022     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1023     for (i=1; i<size; i++) {
1024       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1025       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1026       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1027       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1028     }
1029   } else {
1030     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1031     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1032   }
1033   ierr = PetscFree(column_values);CHKERRQ(ierr);
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1040 {
1041   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1042   PetscErrorCode    ierr;
1043   PetscMPIInt       rank = aij->rank,size = aij->size;
1044   PetscTruth        isdraw,iascii,isbinary;
1045   PetscViewer       sviewer;
1046   PetscViewerFormat format;
1047 
1048   PetscFunctionBegin;
1049   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1050   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1051   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1052   if (iascii) {
1053     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1054     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1055       MatInfo    info;
1056       PetscTruth inodes;
1057 
1058       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1059       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1060       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1061       if (!inodes) {
1062         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1063 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1064       } else {
1065         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1066 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1067       }
1068       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1069       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1070       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1071       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1072       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1073       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1074       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1075       PetscFunctionReturn(0);
1076     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1077       PetscInt   inodecount,inodelimit,*inodes;
1078       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1079       if (inodes) {
1080         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1081       } else {
1082         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1083       }
1084       PetscFunctionReturn(0);
1085     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1086       PetscFunctionReturn(0);
1087     }
1088   } else if (isbinary) {
1089     if (size == 1) {
1090       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1091       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1092     } else {
1093       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1094     }
1095     PetscFunctionReturn(0);
1096   } else if (isdraw) {
1097     PetscDraw  draw;
1098     PetscTruth isnull;
1099     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1100     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1101   }
1102 
1103   if (size == 1) {
1104     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1105     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1106   } else {
1107     /* assemble the entire matrix onto first processor. */
1108     Mat         A;
1109     Mat_SeqAIJ  *Aloc;
1110     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
1111     MatScalar   *a;
1112 
1113     if (mat->rmap.N > 1024) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead");
1114 
1115     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1116     if (!rank) {
1117       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1118     } else {
1119       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1120     }
1121     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1122     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1123     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1124     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1125 
1126     /* copy over the A part */
1127     Aloc = (Mat_SeqAIJ*)aij->A->data;
1128     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1129     row = mat->rmap.rstart;
1130     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
1131     for (i=0; i<m; i++) {
1132       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1133       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1134     }
1135     aj = Aloc->j;
1136     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
1137 
1138     /* copy over the B part */
1139     Aloc = (Mat_SeqAIJ*)aij->B->data;
1140     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1141     row  = mat->rmap.rstart;
1142     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1143     ct   = cols;
1144     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1145     for (i=0; i<m; i++) {
1146       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1147       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1148     }
1149     ierr = PetscFree(ct);CHKERRQ(ierr);
1150     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1151     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1152     /*
1153        Everyone has to call to draw the matrix since the graphics waits are
1154        synchronized across all processors that share the PetscDraw object
1155     */
1156     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1157     if (!rank) {
1158       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1159       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1160     }
1161     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1162     ierr = MatDestroy(A);CHKERRQ(ierr);
1163   }
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 #undef __FUNCT__
1168 #define __FUNCT__ "MatView_MPIAIJ"
1169 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1170 {
1171   PetscErrorCode ierr;
1172   PetscTruth     iascii,isdraw,issocket,isbinary;
1173 
1174   PetscFunctionBegin;
1175   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1176   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1177   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1178   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1179   if (iascii || isdraw || isbinary || issocket) {
1180     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1181   } else {
1182     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1183   }
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "MatRelax_MPIAIJ"
1189 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1190 {
1191   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1192   PetscErrorCode ierr;
1193   Vec            bb1;
1194 
1195   PetscFunctionBegin;
1196   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1197 
1198   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1199     if (flag & SOR_ZERO_INITIAL_GUESS) {
1200       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1201       its--;
1202     }
1203 
1204     while (its--) {
1205       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1206       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1207 
1208       /* update rhs: bb1 = bb - B*x */
1209       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1210       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1211 
1212       /* local sweep */
1213       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
1214     }
1215   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1216     if (flag & SOR_ZERO_INITIAL_GUESS) {
1217       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1218       its--;
1219     }
1220     while (its--) {
1221       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1222       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1223 
1224       /* update rhs: bb1 = bb - B*x */
1225       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1226       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1227 
1228       /* local sweep */
1229       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1230     }
1231   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1232     if (flag & SOR_ZERO_INITIAL_GUESS) {
1233       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1234       its--;
1235     }
1236     while (its--) {
1237       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1238       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1239 
1240       /* update rhs: bb1 = bb - B*x */
1241       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1242       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1243 
1244       /* local sweep */
1245       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1246     }
1247   } else {
1248     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1249   }
1250 
1251   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatPermute_MPIAIJ"
1257 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1258 {
1259   MPI_Comm       comm,pcomm;
1260   PetscInt       first,local_size,nrows,*rows;
1261   int            ntids;
1262   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1263   PetscErrorCode ierr;
1264 
1265   PetscFunctionBegin;
1266   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1267   /* make a collective version of 'rowp' */
1268   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1269   if (pcomm==comm) {
1270     crowp = rowp;
1271   } else {
1272     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1273     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1274     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1275     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1276   }
1277   /* collect the global row permutation and invert it */
1278   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1279   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1280   if (pcomm!=comm) {
1281     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1282   }
1283   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1284   /* get the local target indices */
1285   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1286   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1287   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1288   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1289   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1290   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1291   /* the column permutation is so much easier;
1292      make a local version of 'colp' and invert it */
1293   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1294   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1295   if (ntids==1) {
1296     lcolp = colp;
1297   } else {
1298     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1299     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1300     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1301   }
1302   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1303   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1304   if (ntids>1) {
1305     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1306     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1307   }
1308   /* now we just get the submatrix */
1309   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1310   /* clean up */
1311   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1312   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 #undef __FUNCT__
1317 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1318 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1319 {
1320   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1321   Mat            A = mat->A,B = mat->B;
1322   PetscErrorCode ierr;
1323   PetscReal      isend[5],irecv[5];
1324 
1325   PetscFunctionBegin;
1326   info->block_size     = 1.0;
1327   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1328   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1329   isend[3] = info->memory;  isend[4] = info->mallocs;
1330   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1331   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1332   isend[3] += info->memory;  isend[4] += info->mallocs;
1333   if (flag == MAT_LOCAL) {
1334     info->nz_used      = isend[0];
1335     info->nz_allocated = isend[1];
1336     info->nz_unneeded  = isend[2];
1337     info->memory       = isend[3];
1338     info->mallocs      = isend[4];
1339   } else if (flag == MAT_GLOBAL_MAX) {
1340     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1341     info->nz_used      = irecv[0];
1342     info->nz_allocated = irecv[1];
1343     info->nz_unneeded  = irecv[2];
1344     info->memory       = irecv[3];
1345     info->mallocs      = irecv[4];
1346   } else if (flag == MAT_GLOBAL_SUM) {
1347     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1348     info->nz_used      = irecv[0];
1349     info->nz_allocated = irecv[1];
1350     info->nz_unneeded  = irecv[2];
1351     info->memory       = irecv[3];
1352     info->mallocs      = irecv[4];
1353   }
1354   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1355   info->fill_ratio_needed = 0;
1356   info->factor_mallocs    = 0;
1357   info->rows_global       = (double)matin->rmap.N;
1358   info->columns_global    = (double)matin->cmap.N;
1359   info->rows_local        = (double)matin->rmap.n;
1360   info->columns_local     = (double)matin->cmap.N;
1361 
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 #undef __FUNCT__
1366 #define __FUNCT__ "MatSetOption_MPIAIJ"
1367 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1368 {
1369   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1370   PetscErrorCode ierr;
1371 
1372   PetscFunctionBegin;
1373   switch (op) {
1374   case MAT_NEW_NONZERO_LOCATIONS:
1375   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1376   case MAT_KEEP_ZEROED_ROWS:
1377   case MAT_NEW_NONZERO_LOCATION_ERR:
1378   case MAT_USE_INODES:
1379   case MAT_IGNORE_ZERO_ENTRIES:
1380     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1381     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1382     break;
1383   case MAT_ROW_ORIENTED:
1384     a->roworiented = flg;
1385     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1386     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1387     break;
1388   case MAT_NEW_DIAGONALS:
1389     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1390     break;
1391   case MAT_IGNORE_OFF_PROC_ENTRIES:
1392     a->donotstash = PETSC_TRUE;
1393     break;
1394   case MAT_SYMMETRIC:
1395     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1396     break;
1397   case MAT_STRUCTURALLY_SYMMETRIC:
1398   case MAT_HERMITIAN:
1399   case MAT_SYMMETRY_ETERNAL:
1400     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1401     break;
1402   default:
1403     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1404   }
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 #undef __FUNCT__
1409 #define __FUNCT__ "MatGetRow_MPIAIJ"
1410 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1411 {
1412   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1413   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1414   PetscErrorCode ierr;
1415   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1416   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1417   PetscInt       *cmap,*idx_p;
1418 
1419   PetscFunctionBegin;
1420   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1421   mat->getrowactive = PETSC_TRUE;
1422 
1423   if (!mat->rowvalues && (idx || v)) {
1424     /*
1425         allocate enough space to hold information from the longest row.
1426     */
1427     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1428     PetscInt     max = 1,tmp;
1429     for (i=0; i<matin->rmap.n; i++) {
1430       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1431       if (max < tmp) { max = tmp; }
1432     }
1433     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1434     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1435   }
1436 
1437   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1438   lrow = row - rstart;
1439 
1440   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1441   if (!v)   {pvA = 0; pvB = 0;}
1442   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1443   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1444   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1445   nztot = nzA + nzB;
1446 
1447   cmap  = mat->garray;
1448   if (v  || idx) {
1449     if (nztot) {
1450       /* Sort by increasing column numbers, assuming A and B already sorted */
1451       PetscInt imark = -1;
1452       if (v) {
1453         *v = v_p = mat->rowvalues;
1454         for (i=0; i<nzB; i++) {
1455           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1456           else break;
1457         }
1458         imark = i;
1459         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1460         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1461       }
1462       if (idx) {
1463         *idx = idx_p = mat->rowindices;
1464         if (imark > -1) {
1465           for (i=0; i<imark; i++) {
1466             idx_p[i] = cmap[cworkB[i]];
1467           }
1468         } else {
1469           for (i=0; i<nzB; i++) {
1470             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1471             else break;
1472           }
1473           imark = i;
1474         }
1475         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1476         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1477       }
1478     } else {
1479       if (idx) *idx = 0;
1480       if (v)   *v   = 0;
1481     }
1482   }
1483   *nz = nztot;
1484   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1485   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1486   PetscFunctionReturn(0);
1487 }
1488 
1489 #undef __FUNCT__
1490 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1491 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1492 {
1493   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1494 
1495   PetscFunctionBegin;
1496   if (!aij->getrowactive) {
1497     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1498   }
1499   aij->getrowactive = PETSC_FALSE;
1500   PetscFunctionReturn(0);
1501 }
1502 
1503 #undef __FUNCT__
1504 #define __FUNCT__ "MatNorm_MPIAIJ"
1505 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1506 {
1507   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1508   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1509   PetscErrorCode ierr;
1510   PetscInt       i,j,cstart = mat->cmap.rstart;
1511   PetscReal      sum = 0.0;
1512   MatScalar      *v;
1513 
1514   PetscFunctionBegin;
1515   if (aij->size == 1) {
1516     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1517   } else {
1518     if (type == NORM_FROBENIUS) {
1519       v = amat->a;
1520       for (i=0; i<amat->nz; i++) {
1521 #if defined(PETSC_USE_COMPLEX)
1522         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1523 #else
1524         sum += (*v)*(*v); v++;
1525 #endif
1526       }
1527       v = bmat->a;
1528       for (i=0; i<bmat->nz; i++) {
1529 #if defined(PETSC_USE_COMPLEX)
1530         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1531 #else
1532         sum += (*v)*(*v); v++;
1533 #endif
1534       }
1535       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1536       *norm = sqrt(*norm);
1537     } else if (type == NORM_1) { /* max column norm */
1538       PetscReal *tmp,*tmp2;
1539       PetscInt  *jj,*garray = aij->garray;
1540       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1541       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1542       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1543       *norm = 0.0;
1544       v = amat->a; jj = amat->j;
1545       for (j=0; j<amat->nz; j++) {
1546         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1547       }
1548       v = bmat->a; jj = bmat->j;
1549       for (j=0; j<bmat->nz; j++) {
1550         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1551       }
1552       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1553       for (j=0; j<mat->cmap.N; j++) {
1554         if (tmp2[j] > *norm) *norm = tmp2[j];
1555       }
1556       ierr = PetscFree(tmp);CHKERRQ(ierr);
1557       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1558     } else if (type == NORM_INFINITY) { /* max row norm */
1559       PetscReal ntemp = 0.0;
1560       for (j=0; j<aij->A->rmap.n; j++) {
1561         v = amat->a + amat->i[j];
1562         sum = 0.0;
1563         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1564           sum += PetscAbsScalar(*v); v++;
1565         }
1566         v = bmat->a + bmat->i[j];
1567         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1568           sum += PetscAbsScalar(*v); v++;
1569         }
1570         if (sum > ntemp) ntemp = sum;
1571       }
1572       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1573     } else {
1574       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1575     }
1576   }
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 #undef __FUNCT__
1581 #define __FUNCT__ "MatTranspose_MPIAIJ"
1582 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1583 {
1584   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1585   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1586   PetscErrorCode ierr;
1587   PetscInt       M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1588   PetscInt       cstart=A->cmap.rstart,ncol;
1589   Mat            B;
1590   MatScalar      *array;
1591 
1592   PetscFunctionBegin;
1593   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1594 
1595   ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n;
1596   ai = Aloc->i; aj = Aloc->j;
1597   bi = Bloc->i; bj = Bloc->j;
1598   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1599     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1600     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1601     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1602     for (i=0; i<ai[ma]; i++){
1603       d_nnz[aj[i]] ++;
1604       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1605     }
1606 
1607     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1608     ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1609     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1610     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1611     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1612   } else {
1613     B = *matout;
1614   }
1615 
1616   /* copy over the A part */
1617   array = Aloc->a;
1618   row = A->rmap.rstart;
1619   for (i=0; i<ma; i++) {
1620     ncol = ai[i+1]-ai[i];
1621     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1622     row++; array += ncol; aj += ncol;
1623   }
1624   aj = Aloc->j;
1625   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1626 
1627   /* copy over the B part */
1628   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1629   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1630   array = Bloc->a;
1631   row = A->rmap.rstart;
1632   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1633   cols_tmp = cols;
1634   for (i=0; i<mb; i++) {
1635     ncol = bi[i+1]-bi[i];
1636     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1637     row++; array += ncol; cols_tmp += ncol;
1638   }
1639   ierr = PetscFree(cols);CHKERRQ(ierr);
1640 
1641   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1642   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1643   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1644     *matout = B;
1645   } else {
1646     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1647   }
1648   PetscFunctionReturn(0);
1649 }
1650 
1651 #undef __FUNCT__
1652 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1653 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1654 {
1655   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1656   Mat            a = aij->A,b = aij->B;
1657   PetscErrorCode ierr;
1658   PetscInt       s1,s2,s3;
1659 
1660   PetscFunctionBegin;
1661   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1662   if (rr) {
1663     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1664     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1665     /* Overlap communication with computation. */
1666     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1667   }
1668   if (ll) {
1669     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1670     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1671     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1672   }
1673   /* scale  the diagonal block */
1674   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1675 
1676   if (rr) {
1677     /* Do a scatter end and then right scale the off-diagonal block */
1678     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1679     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1680   }
1681 
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 #undef __FUNCT__
1686 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1687 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1688 {
1689   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1690   PetscErrorCode ierr;
1691 
1692   PetscFunctionBegin;
1693   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1694   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1695   PetscFunctionReturn(0);
1696 }
1697 #undef __FUNCT__
1698 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1699 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1700 {
1701   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1702   PetscErrorCode ierr;
1703 
1704   PetscFunctionBegin;
1705   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1706   PetscFunctionReturn(0);
1707 }
1708 
1709 #undef __FUNCT__
1710 #define __FUNCT__ "MatEqual_MPIAIJ"
1711 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1712 {
1713   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1714   Mat            a,b,c,d;
1715   PetscTruth     flg;
1716   PetscErrorCode ierr;
1717 
1718   PetscFunctionBegin;
1719   a = matA->A; b = matA->B;
1720   c = matB->A; d = matB->B;
1721 
1722   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1723   if (flg) {
1724     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1725   }
1726   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 #undef __FUNCT__
1731 #define __FUNCT__ "MatCopy_MPIAIJ"
1732 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1733 {
1734   PetscErrorCode ierr;
1735   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1736   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1737 
1738   PetscFunctionBegin;
1739   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1740   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1741     /* because of the column compression in the off-processor part of the matrix a->B,
1742        the number of columns in a->B and b->B may be different, hence we cannot call
1743        the MatCopy() directly on the two parts. If need be, we can provide a more
1744        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1745        then copying the submatrices */
1746     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1747   } else {
1748     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1749     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1750   }
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 #undef __FUNCT__
1755 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1756 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1757 {
1758   PetscErrorCode ierr;
1759 
1760   PetscFunctionBegin;
1761   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1762   PetscFunctionReturn(0);
1763 }
1764 
1765 #include "petscblaslapack.h"
1766 #undef __FUNCT__
1767 #define __FUNCT__ "MatAXPY_MPIAIJ"
1768 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1769 {
1770   PetscErrorCode ierr;
1771   PetscInt       i;
1772   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1773   PetscBLASInt   bnz,one=1;
1774   Mat_SeqAIJ     *x,*y;
1775 
1776   PetscFunctionBegin;
1777   if (str == SAME_NONZERO_PATTERN) {
1778     PetscScalar alpha = a;
1779     x = (Mat_SeqAIJ *)xx->A->data;
1780     y = (Mat_SeqAIJ *)yy->A->data;
1781     bnz = PetscBLASIntCast(x->nz);
1782     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1783     x = (Mat_SeqAIJ *)xx->B->data;
1784     y = (Mat_SeqAIJ *)yy->B->data;
1785     bnz = PetscBLASIntCast(x->nz);
1786     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1787   } else if (str == SUBSET_NONZERO_PATTERN) {
1788     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1789 
1790     x = (Mat_SeqAIJ *)xx->B->data;
1791     y = (Mat_SeqAIJ *)yy->B->data;
1792     if (y->xtoy && y->XtoY != xx->B) {
1793       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1794       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1795     }
1796     if (!y->xtoy) { /* get xtoy */
1797       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1798       y->XtoY = xx->B;
1799       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1800     }
1801     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1802   } else {
1803     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1804   }
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1809 
1810 #undef __FUNCT__
1811 #define __FUNCT__ "MatConjugate_MPIAIJ"
1812 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1813 {
1814 #if defined(PETSC_USE_COMPLEX)
1815   PetscErrorCode ierr;
1816   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1817 
1818   PetscFunctionBegin;
1819   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1820   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1821 #else
1822   PetscFunctionBegin;
1823 #endif
1824   PetscFunctionReturn(0);
1825 }
1826 
1827 #undef __FUNCT__
1828 #define __FUNCT__ "MatRealPart_MPIAIJ"
1829 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1830 {
1831   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1832   PetscErrorCode ierr;
1833 
1834   PetscFunctionBegin;
1835   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1836   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1837   PetscFunctionReturn(0);
1838 }
1839 
1840 #undef __FUNCT__
1841 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1842 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1843 {
1844   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1845   PetscErrorCode ierr;
1846 
1847   PetscFunctionBegin;
1848   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1849   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1850   PetscFunctionReturn(0);
1851 }
1852 
1853 #ifdef PETSC_HAVE_PBGL
1854 
1855 #include <boost/parallel/mpi/bsp_process_group.hpp>
1856 #include <boost/graph/distributed/ilu_default_graph.hpp>
1857 #include <boost/graph/distributed/ilu_0_block.hpp>
1858 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1859 #include <boost/graph/distributed/petsc/interface.hpp>
1860 #include <boost/multi_array.hpp>
1861 #include <boost/parallel/distributed_property_map.hpp>
1862 
1863 #undef __FUNCT__
1864 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1865 /*
1866   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1867 */
1868 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1869 {
1870   namespace petsc = boost::distributed::petsc;
1871 
1872   namespace graph_dist = boost::graph::distributed;
1873   using boost::graph::distributed::ilu_default::process_group_type;
1874   using boost::graph::ilu_permuted;
1875 
1876   PetscTruth      row_identity, col_identity;
1877   PetscContainer  c;
1878   PetscInt        m, n, M, N;
1879   PetscErrorCode  ierr;
1880 
1881   PetscFunctionBegin;
1882   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1883   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1884   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1885   if (!row_identity || !col_identity) {
1886     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1887   }
1888 
1889   process_group_type pg;
1890   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1891   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1892   lgraph_type&   level_graph = *lgraph_p;
1893   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1894 
1895   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1896   ilu_permuted(level_graph);
1897 
1898   /* put together the new matrix */
1899   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1900   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1901   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1902   ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr);
1903   ierr = MatSetType(*fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1904   ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1905   ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1906   (*fact)->factor = MAT_FACTOR_LU;
1907 
1908   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1909   ierr = PetscContainerSetPointer(c, lgraph_p);
1910   ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1911   PetscFunctionReturn(0);
1912 }
1913 
1914 #undef __FUNCT__
1915 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1916 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1917 {
1918   PetscFunctionBegin;
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 #undef __FUNCT__
1923 #define __FUNCT__ "MatSolve_MPIAIJ"
1924 /*
1925   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1926 */
1927 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1928 {
1929   namespace graph_dist = boost::graph::distributed;
1930 
1931   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1932   lgraph_type*   lgraph_p;
1933   PetscContainer c;
1934   PetscErrorCode ierr;
1935 
1936   PetscFunctionBegin;
1937   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1938   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1939   ierr = VecCopy(b, x); CHKERRQ(ierr);
1940 
1941   PetscScalar* array_x;
1942   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1943   PetscInt sx;
1944   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1945 
1946   PetscScalar* array_b;
1947   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1948   PetscInt sb;
1949   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1950 
1951   lgraph_type&   level_graph = *lgraph_p;
1952   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1953 
1954   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1955   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1956                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1957 
1958   typedef boost::iterator_property_map<array_ref_type::iterator,
1959                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1960   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1961                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1962 
1963   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1964 
1965   PetscFunctionReturn(0);
1966 }
1967 #endif
1968 
1969 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1970   PetscInt       nzlocal,nsends,nrecvs;
1971   PetscMPIInt    *send_rank;
1972   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
1973   PetscScalar    *sbuf_a,**rbuf_a;
1974   PetscErrorCode (*MatDestroy)(Mat);
1975 } Mat_Redundant;
1976 
1977 #undef __FUNCT__
1978 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1979 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1980 {
1981   PetscErrorCode       ierr;
1982   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1983   PetscInt             i;
1984 
1985   PetscFunctionBegin;
1986   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1987   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1988   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1989   for (i=0; i<redund->nrecvs; i++){
1990     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1991     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1992   }
1993   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1994   ierr = PetscFree(redund);CHKERRQ(ierr);
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 #undef __FUNCT__
1999 #define __FUNCT__ "MatDestroy_MatRedundant"
2000 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2001 {
2002   PetscErrorCode  ierr;
2003   PetscContainer  container;
2004   Mat_Redundant   *redund=PETSC_NULL;
2005 
2006   PetscFunctionBegin;
2007   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2008   if (container) {
2009     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2010   } else {
2011     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2012   }
2013   A->ops->destroy = redund->MatDestroy;
2014   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2015   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2016   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2017   PetscFunctionReturn(0);
2018 }
2019 
2020 #undef __FUNCT__
2021 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2022 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2023 {
2024   PetscMPIInt    rank,size;
2025   MPI_Comm       comm=((PetscObject)mat)->comm;
2026   PetscErrorCode ierr;
2027   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2028   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2029   PetscInt       *rowrange=mat->rmap.range;
2030   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2031   Mat            A=aij->A,B=aij->B,C=*matredundant;
2032   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2033   PetscScalar    *sbuf_a;
2034   PetscInt       nzlocal=a->nz+b->nz;
2035   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2036   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
2037   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2038   MatScalar      *aworkA,*aworkB;
2039   PetscScalar    *vals;
2040   PetscMPIInt    tag1,tag2,tag3,imdex;
2041   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2042                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2043   MPI_Status     recv_status,*send_status;
2044   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2045   PetscInt       **rbuf_j=PETSC_NULL;
2046   PetscScalar    **rbuf_a=PETSC_NULL;
2047   Mat_Redundant  *redund=PETSC_NULL;
2048   PetscContainer container;
2049 
2050   PetscFunctionBegin;
2051   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2052   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2053 
2054   if (reuse == MAT_REUSE_MATRIX) {
2055     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2056     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2057     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2058     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2059     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2060     if (container) {
2061       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2062     } else {
2063       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2064     }
2065     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2066 
2067     nsends    = redund->nsends;
2068     nrecvs    = redund->nrecvs;
2069     send_rank = redund->send_rank; recv_rank = send_rank + size;
2070     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
2071     sbuf_j    = redund->sbuf_j;
2072     sbuf_a    = redund->sbuf_a;
2073     rbuf_j    = redund->rbuf_j;
2074     rbuf_a    = redund->rbuf_a;
2075   }
2076 
2077   if (reuse == MAT_INITIAL_MATRIX){
2078     PetscMPIInt  subrank,subsize;
2079     PetscInt     nleftover,np_subcomm;
2080     /* get the destination processors' id send_rank, nsends and nrecvs */
2081     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2082     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2083     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
2084     recv_rank = send_rank + size;
2085     np_subcomm = size/nsubcomm;
2086     nleftover  = size - nsubcomm*np_subcomm;
2087     nsends = 0; nrecvs = 0;
2088     for (i=0; i<size; i++){ /* i=rank*/
2089       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2090         send_rank[nsends] = i; nsends++;
2091         recv_rank[nrecvs++] = i;
2092       }
2093     }
2094     if (rank >= size - nleftover){/* this proc is a leftover processor */
2095       i = size-nleftover-1;
2096       j = 0;
2097       while (j < nsubcomm - nleftover){
2098         send_rank[nsends++] = i;
2099         i--; j++;
2100       }
2101     }
2102 
2103     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2104       for (i=0; i<nleftover; i++){
2105         recv_rank[nrecvs++] = size-nleftover+i;
2106       }
2107     }
2108 
2109     /* allocate sbuf_j, sbuf_a */
2110     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2111     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2112     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2113   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2114 
2115   /* copy mat's local entries into the buffers */
2116   if (reuse == MAT_INITIAL_MATRIX){
2117     rownz_max = 0;
2118     rptr = sbuf_j;
2119     cols = sbuf_j + rend-rstart + 1;
2120     vals = sbuf_a;
2121     rptr[0] = 0;
2122     for (i=0; i<rend-rstart; i++){
2123       row = i + rstart;
2124       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2125       ncols  = nzA + nzB;
2126       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2127       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2128       /* load the column indices for this row into cols */
2129       lwrite = 0;
2130       for (l=0; l<nzB; l++) {
2131         if ((ctmp = bmap[cworkB[l]]) < cstart){
2132           vals[lwrite]   = aworkB[l];
2133           cols[lwrite++] = ctmp;
2134         }
2135       }
2136       for (l=0; l<nzA; l++){
2137         vals[lwrite]   = aworkA[l];
2138         cols[lwrite++] = cstart + cworkA[l];
2139       }
2140       for (l=0; l<nzB; l++) {
2141         if ((ctmp = bmap[cworkB[l]]) >= cend){
2142           vals[lwrite]   = aworkB[l];
2143           cols[lwrite++] = ctmp;
2144         }
2145       }
2146       vals += ncols;
2147       cols += ncols;
2148       rptr[i+1] = rptr[i] + ncols;
2149       if (rownz_max < ncols) rownz_max = ncols;
2150     }
2151     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2152   } else { /* only copy matrix values into sbuf_a */
2153     rptr = sbuf_j;
2154     vals = sbuf_a;
2155     rptr[0] = 0;
2156     for (i=0; i<rend-rstart; i++){
2157       row = i + rstart;
2158       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2159       ncols  = nzA + nzB;
2160       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2161       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2162       lwrite = 0;
2163       for (l=0; l<nzB; l++) {
2164         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2165       }
2166       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2167       for (l=0; l<nzB; l++) {
2168         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2169       }
2170       vals += ncols;
2171       rptr[i+1] = rptr[i] + ncols;
2172     }
2173   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2174 
2175   /* send nzlocal to others, and recv other's nzlocal */
2176   /*--------------------------------------------------*/
2177   if (reuse == MAT_INITIAL_MATRIX){
2178     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2179     s_waits2 = s_waits3 + nsends;
2180     s_waits1 = s_waits2 + nsends;
2181     r_waits1 = s_waits1 + nsends;
2182     r_waits2 = r_waits1 + nrecvs;
2183     r_waits3 = r_waits2 + nrecvs;
2184   } else {
2185     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2186     r_waits3 = s_waits3 + nsends;
2187   }
2188 
2189   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2190   if (reuse == MAT_INITIAL_MATRIX){
2191     /* get new tags to keep the communication clean */
2192     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2193     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2194     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2195     rbuf_nz = sbuf_nz + nsends;
2196 
2197     /* post receives of other's nzlocal */
2198     for (i=0; i<nrecvs; i++){
2199       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2200     }
2201     /* send nzlocal to others */
2202     for (i=0; i<nsends; i++){
2203       sbuf_nz[i] = nzlocal;
2204       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2205     }
2206     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2207     count = nrecvs;
2208     while (count) {
2209       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2210       recv_rank[imdex] = recv_status.MPI_SOURCE;
2211       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2212       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2213 
2214       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2215       rbuf_nz[imdex] += i + 2;
2216       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2217       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2218       count--;
2219     }
2220     /* wait on sends of nzlocal */
2221     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2222     /* send mat->i,j to others, and recv from other's */
2223     /*------------------------------------------------*/
2224     for (i=0; i<nsends; i++){
2225       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2226       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2227     }
2228     /* wait on receives of mat->i,j */
2229     /*------------------------------*/
2230     count = nrecvs;
2231     while (count) {
2232       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2233       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2234       count--;
2235     }
2236     /* wait on sends of mat->i,j */
2237     /*---------------------------*/
2238     if (nsends) {
2239       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2240     }
2241   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2242 
2243   /* post receives, send and receive mat->a */
2244   /*----------------------------------------*/
2245   for (imdex=0; imdex<nrecvs; imdex++) {
2246     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2247   }
2248   for (i=0; i<nsends; i++){
2249     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2250   }
2251   count = nrecvs;
2252   while (count) {
2253     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2254     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2255     count--;
2256   }
2257   if (nsends) {
2258     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2259   }
2260 
2261   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2262 
2263   /* create redundant matrix */
2264   /*-------------------------*/
2265   if (reuse == MAT_INITIAL_MATRIX){
2266     /* compute rownz_max for preallocation */
2267     for (imdex=0; imdex<nrecvs; imdex++){
2268       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2269       rptr = rbuf_j[imdex];
2270       for (i=0; i<j; i++){
2271         ncols = rptr[i+1] - rptr[i];
2272         if (rownz_max < ncols) rownz_max = ncols;
2273       }
2274     }
2275 
2276     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2277     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2278     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2279     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2280     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2281   } else {
2282     C = *matredundant;
2283   }
2284 
2285   /* insert local matrix entries */
2286   rptr = sbuf_j;
2287   cols = sbuf_j + rend-rstart + 1;
2288   vals = sbuf_a;
2289   for (i=0; i<rend-rstart; i++){
2290     row   = i + rstart;
2291     ncols = rptr[i+1] - rptr[i];
2292     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2293     vals += ncols;
2294     cols += ncols;
2295   }
2296   /* insert received matrix entries */
2297   for (imdex=0; imdex<nrecvs; imdex++){
2298     rstart = rowrange[recv_rank[imdex]];
2299     rend   = rowrange[recv_rank[imdex]+1];
2300     rptr = rbuf_j[imdex];
2301     cols = rbuf_j[imdex] + rend-rstart + 1;
2302     vals = rbuf_a[imdex];
2303     for (i=0; i<rend-rstart; i++){
2304       row   = i + rstart;
2305       ncols = rptr[i+1] - rptr[i];
2306       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2307       vals += ncols;
2308       cols += ncols;
2309     }
2310   }
2311   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2312   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2313   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2314   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
2315   if (reuse == MAT_INITIAL_MATRIX){
2316     PetscContainer container;
2317     *matredundant = C;
2318     /* create a supporting struct and attach it to C for reuse */
2319     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2320     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2321     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2322     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2323     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2324 
2325     redund->nzlocal = nzlocal;
2326     redund->nsends  = nsends;
2327     redund->nrecvs  = nrecvs;
2328     redund->send_rank = send_rank;
2329     redund->sbuf_nz = sbuf_nz;
2330     redund->sbuf_j  = sbuf_j;
2331     redund->sbuf_a  = sbuf_a;
2332     redund->rbuf_j  = rbuf_j;
2333     redund->rbuf_a  = rbuf_a;
2334 
2335     redund->MatDestroy = C->ops->destroy;
2336     C->ops->destroy    = MatDestroy_MatRedundant;
2337   }
2338   PetscFunctionReturn(0);
2339 }
2340 
2341 #undef __FUNCT__
2342 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2343 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2344 {
2345   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2346   PetscErrorCode ierr;
2347   PetscInt       i,*idxb = 0;
2348   PetscScalar    *va,*vb;
2349   Vec            vtmp;
2350 
2351   PetscFunctionBegin;
2352   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2353   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2354   if (idx) {
2355     for (i=0; i<A->cmap.n; i++) {
2356       if (PetscAbsScalar(va[i])) idx[i] += A->cmap.rstart;
2357     }
2358   }
2359 
2360   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr);
2361   if (idx) {
2362     ierr = PetscMalloc(A->rmap.n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2363   }
2364   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2365   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2366 
2367   for (i=0; i<A->rmap.n; i++){
2368     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2369       va[i] = vb[i];
2370       if (idx) idx[i] = a->garray[idxb[i]];
2371     }
2372   }
2373 
2374   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2375   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2376   if (idxb) {
2377     ierr = PetscFree(idxb);CHKERRQ(ierr);
2378   }
2379   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2385 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2386 {
2387   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2388   PetscInt       n      = A->rmap.n;
2389   PetscInt       cstart = A->cmap.rstart;
2390   PetscInt      *cmap   = mat->garray;
2391   PetscInt      *diagIdx, *offdiagIdx;
2392   Vec            diagV, offdiagV;
2393   PetscScalar   *a, *diagA, *offdiagA;
2394   PetscInt       r;
2395   PetscErrorCode ierr;
2396 
2397   PetscFunctionBegin;
2398   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2399   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2400   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2401   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2402   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2403   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2404   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2405   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2406   for(r = 0; r < n; ++r) {
2407     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2408       a[r]   = diagA[r];
2409       idx[r] = cstart + diagIdx[r];
2410     } else {
2411       a[r]   = offdiagA[r];
2412       idx[r] = cmap[offdiagIdx[r]];
2413     }
2414   }
2415   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2416   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2417   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2418   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2419   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2420   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2421   PetscFunctionReturn(0);
2422 }
2423 
2424 #undef __FUNCT__
2425 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2426 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[])
2427 {
2428   PetscErrorCode ierr;
2429 
2430   PetscFunctionBegin;
2431   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
2432   PetscFunctionReturn(0);
2433 }
2434 
2435 /* -------------------------------------------------------------------*/
2436 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2437        MatGetRow_MPIAIJ,
2438        MatRestoreRow_MPIAIJ,
2439        MatMult_MPIAIJ,
2440 /* 4*/ MatMultAdd_MPIAIJ,
2441        MatMultTranspose_MPIAIJ,
2442        MatMultTransposeAdd_MPIAIJ,
2443 #ifdef PETSC_HAVE_PBGL
2444        MatSolve_MPIAIJ,
2445 #else
2446        0,
2447 #endif
2448        0,
2449        0,
2450 /*10*/ 0,
2451        0,
2452        0,
2453        MatRelax_MPIAIJ,
2454        MatTranspose_MPIAIJ,
2455 /*15*/ MatGetInfo_MPIAIJ,
2456        MatEqual_MPIAIJ,
2457        MatGetDiagonal_MPIAIJ,
2458        MatDiagonalScale_MPIAIJ,
2459        MatNorm_MPIAIJ,
2460 /*20*/ MatAssemblyBegin_MPIAIJ,
2461        MatAssemblyEnd_MPIAIJ,
2462        0,
2463        MatSetOption_MPIAIJ,
2464        MatZeroEntries_MPIAIJ,
2465 /*25*/ MatZeroRows_MPIAIJ,
2466        0,
2467 #ifdef PETSC_HAVE_PBGL
2468        MatLUFactorNumeric_MPIAIJ,
2469 #else
2470        0,
2471 #endif
2472        0,
2473        0,
2474 /*30*/ MatSetUpPreallocation_MPIAIJ,
2475 #ifdef PETSC_HAVE_PBGL
2476        MatILUFactorSymbolic_MPIAIJ,
2477 #else
2478        0,
2479 #endif
2480        0,
2481        0,
2482        0,
2483 /*35*/ MatDuplicate_MPIAIJ,
2484        0,
2485        0,
2486        0,
2487        0,
2488 /*40*/ MatAXPY_MPIAIJ,
2489        MatGetSubMatrices_MPIAIJ,
2490        MatIncreaseOverlap_MPIAIJ,
2491        MatGetValues_MPIAIJ,
2492        MatCopy_MPIAIJ,
2493 /*45*/ 0,
2494        MatScale_MPIAIJ,
2495        0,
2496        0,
2497        0,
2498 /*50*/ MatSetBlockSize_MPIAIJ,
2499        0,
2500        0,
2501        0,
2502        0,
2503 /*55*/ MatFDColoringCreate_MPIAIJ,
2504        0,
2505        MatSetUnfactored_MPIAIJ,
2506        MatPermute_MPIAIJ,
2507        0,
2508 /*60*/ MatGetSubMatrix_MPIAIJ,
2509        MatDestroy_MPIAIJ,
2510        MatView_MPIAIJ,
2511        0,
2512        0,
2513 /*65*/ 0,
2514        0,
2515        0,
2516        0,
2517        0,
2518 /*70*/ MatGetRowMaxAbs_MPIAIJ,
2519        0,
2520        MatSetColoring_MPIAIJ,
2521 #if defined(PETSC_HAVE_ADIC)
2522        MatSetValuesAdic_MPIAIJ,
2523 #else
2524        0,
2525 #endif
2526        MatSetValuesAdifor_MPIAIJ,
2527 /*75*/ 0,
2528        0,
2529        0,
2530        0,
2531        0,
2532 /*80*/ 0,
2533        0,
2534        0,
2535        0,
2536 /*84*/ MatLoad_MPIAIJ,
2537        0,
2538        0,
2539        0,
2540        0,
2541        0,
2542 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2543        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2544        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2545        MatPtAP_Basic,
2546        MatPtAPSymbolic_MPIAIJ,
2547 /*95*/ MatPtAPNumeric_MPIAIJ,
2548        0,
2549        0,
2550        0,
2551        0,
2552 /*100*/0,
2553        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2554        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2555        MatConjugate_MPIAIJ,
2556        0,
2557 /*105*/MatSetValuesRow_MPIAIJ,
2558        MatRealPart_MPIAIJ,
2559        MatImaginaryPart_MPIAIJ,
2560        0,
2561        0,
2562 /*110*/0,
2563        MatGetRedundantMatrix_MPIAIJ,
2564        MatGetRowMin_MPIAIJ,
2565        0,
2566        0,
2567 /*115*/MatGetSeqNonzerostructure_MPIAIJ};
2568 
2569 /* ----------------------------------------------------------------------------------------*/
2570 
2571 EXTERN_C_BEGIN
2572 #undef __FUNCT__
2573 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2574 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2575 {
2576   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2577   PetscErrorCode ierr;
2578 
2579   PetscFunctionBegin;
2580   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2581   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2582   PetscFunctionReturn(0);
2583 }
2584 EXTERN_C_END
2585 
2586 EXTERN_C_BEGIN
2587 #undef __FUNCT__
2588 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2589 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2590 {
2591   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2592   PetscErrorCode ierr;
2593 
2594   PetscFunctionBegin;
2595   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2596   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2597   PetscFunctionReturn(0);
2598 }
2599 EXTERN_C_END
2600 
2601 #include "petscpc.h"
2602 EXTERN_C_BEGIN
2603 #undef __FUNCT__
2604 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2605 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2606 {
2607   Mat_MPIAIJ     *b;
2608   PetscErrorCode ierr;
2609   PetscInt       i;
2610 
2611   PetscFunctionBegin;
2612   B->preallocated = PETSC_TRUE;
2613   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2614   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2615   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2616   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2617 
2618   B->rmap.bs = B->cmap.bs = 1;
2619   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2620   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2621   if (d_nnz) {
2622     for (i=0; i<B->rmap.n; i++) {
2623       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2624     }
2625   }
2626   if (o_nnz) {
2627     for (i=0; i<B->rmap.n; i++) {
2628       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2629     }
2630   }
2631   b = (Mat_MPIAIJ*)B->data;
2632 
2633   /* Explicitly create 2 MATSEQAIJ matrices. */
2634   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2635   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2636   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2637   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2638   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2639   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2640   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2641   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2642 
2643   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2644   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2645 
2646   PetscFunctionReturn(0);
2647 }
2648 EXTERN_C_END
2649 
2650 #undef __FUNCT__
2651 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2652 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2653 {
2654   Mat            mat;
2655   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2656   PetscErrorCode ierr;
2657 
2658   PetscFunctionBegin;
2659   *newmat       = 0;
2660   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2661   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2662   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2663   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2664   a    = (Mat_MPIAIJ*)mat->data;
2665 
2666   mat->factor       = matin->factor;
2667   mat->rmap.bs      = matin->rmap.bs;
2668   mat->assembled    = PETSC_TRUE;
2669   mat->insertmode   = NOT_SET_VALUES;
2670   mat->preallocated = PETSC_TRUE;
2671 
2672   a->size           = oldmat->size;
2673   a->rank           = oldmat->rank;
2674   a->donotstash     = oldmat->donotstash;
2675   a->roworiented    = oldmat->roworiented;
2676   a->rowindices     = 0;
2677   a->rowvalues      = 0;
2678   a->getrowactive   = PETSC_FALSE;
2679 
2680   ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2681   ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2682 
2683   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2684   if (oldmat->colmap) {
2685 #if defined (PETSC_USE_CTABLE)
2686     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2687 #else
2688     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2689     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2690     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2691 #endif
2692   } else a->colmap = 0;
2693   if (oldmat->garray) {
2694     PetscInt len;
2695     len  = oldmat->B->cmap.n;
2696     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2697     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2698     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2699   } else a->garray = 0;
2700 
2701   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2702   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2703   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2704   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2705   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2706   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2707   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2708   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2709   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2710   *newmat = mat;
2711   PetscFunctionReturn(0);
2712 }
2713 
2714 #include "petscsys.h"
2715 
2716 #undef __FUNCT__
2717 #define __FUNCT__ "MatLoad_MPIAIJ"
2718 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2719 {
2720   Mat            A;
2721   PetscScalar    *vals,*svals;
2722   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2723   MPI_Status     status;
2724   PetscErrorCode ierr;
2725   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2726   PetscInt       i,nz,j,rstart,rend,mmax;
2727   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2728   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2729   PetscInt       cend,cstart,n,*rowners;
2730   int            fd;
2731 
2732   PetscFunctionBegin;
2733   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2734   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2735   if (!rank) {
2736     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2737     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2738     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2739   }
2740 
2741   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2742   M = header[1]; N = header[2];
2743   /* determine ownership of all rows */
2744   m    = M/size + ((M % size) > rank);
2745   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2746   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2747 
2748   /* First process needs enough room for process with most rows */
2749   if (!rank) {
2750     mmax       = rowners[1];
2751     for (i=2; i<size; i++) {
2752       mmax = PetscMax(mmax,rowners[i]);
2753     }
2754   } else mmax = m;
2755 
2756   rowners[0] = 0;
2757   for (i=2; i<=size; i++) {
2758     rowners[i] += rowners[i-1];
2759   }
2760   rstart = rowners[rank];
2761   rend   = rowners[rank+1];
2762 
2763   /* distribute row lengths to all processors */
2764   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2765   if (!rank) {
2766     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2767     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2768     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2769     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2770     for (j=0; j<m; j++) {
2771       procsnz[0] += ourlens[j];
2772     }
2773     for (i=1; i<size; i++) {
2774       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2775       /* calculate the number of nonzeros on each processor */
2776       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2777         procsnz[i] += rowlengths[j];
2778       }
2779       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2780     }
2781     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2782   } else {
2783     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2784   }
2785 
2786   if (!rank) {
2787     /* determine max buffer needed and allocate it */
2788     maxnz = 0;
2789     for (i=0; i<size; i++) {
2790       maxnz = PetscMax(maxnz,procsnz[i]);
2791     }
2792     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2793 
2794     /* read in my part of the matrix column indices  */
2795     nz   = procsnz[0];
2796     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2797     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2798 
2799     /* read in every one elses and ship off */
2800     for (i=1; i<size; i++) {
2801       nz   = procsnz[i];
2802       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2803       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2804     }
2805     ierr = PetscFree(cols);CHKERRQ(ierr);
2806   } else {
2807     /* determine buffer space needed for message */
2808     nz = 0;
2809     for (i=0; i<m; i++) {
2810       nz += ourlens[i];
2811     }
2812     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2813 
2814     /* receive message of column indices*/
2815     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2816     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2817     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2818   }
2819 
2820   /* determine column ownership if matrix is not square */
2821   if (N != M) {
2822     n      = N/size + ((N % size) > rank);
2823     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2824     cstart = cend - n;
2825   } else {
2826     cstart = rstart;
2827     cend   = rend;
2828     n      = cend - cstart;
2829   }
2830 
2831   /* loop over local rows, determining number of off diagonal entries */
2832   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2833   jj = 0;
2834   for (i=0; i<m; i++) {
2835     for (j=0; j<ourlens[i]; j++) {
2836       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2837       jj++;
2838     }
2839   }
2840 
2841   /* create our matrix */
2842   for (i=0; i<m; i++) {
2843     ourlens[i] -= offlens[i];
2844   }
2845   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2846   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2847   ierr = MatSetType(A,type);CHKERRQ(ierr);
2848   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2849 
2850   for (i=0; i<m; i++) {
2851     ourlens[i] += offlens[i];
2852   }
2853 
2854   if (!rank) {
2855     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2856 
2857     /* read in my part of the matrix numerical values  */
2858     nz   = procsnz[0];
2859     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2860 
2861     /* insert into matrix */
2862     jj      = rstart;
2863     smycols = mycols;
2864     svals   = vals;
2865     for (i=0; i<m; i++) {
2866       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2867       smycols += ourlens[i];
2868       svals   += ourlens[i];
2869       jj++;
2870     }
2871 
2872     /* read in other processors and ship out */
2873     for (i=1; i<size; i++) {
2874       nz   = procsnz[i];
2875       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2876       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2877     }
2878     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2879   } else {
2880     /* receive numeric values */
2881     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2882 
2883     /* receive message of values*/
2884     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2885     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2886     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2887 
2888     /* insert into matrix */
2889     jj      = rstart;
2890     smycols = mycols;
2891     svals   = vals;
2892     for (i=0; i<m; i++) {
2893       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2894       smycols += ourlens[i];
2895       svals   += ourlens[i];
2896       jj++;
2897     }
2898   }
2899   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2900   ierr = PetscFree(vals);CHKERRQ(ierr);
2901   ierr = PetscFree(mycols);CHKERRQ(ierr);
2902   ierr = PetscFree(rowners);CHKERRQ(ierr);
2903 
2904   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2905   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2906   *newmat = A;
2907   PetscFunctionReturn(0);
2908 }
2909 
2910 #undef __FUNCT__
2911 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2912 /*
2913     Not great since it makes two copies of the submatrix, first an SeqAIJ
2914   in local and then by concatenating the local matrices the end result.
2915   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2916 */
2917 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2918 {
2919   PetscErrorCode ierr;
2920   PetscMPIInt    rank,size;
2921   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2922   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2923   Mat            *local,M,Mreuse;
2924   MatScalar      *vwork,*aa;
2925   MPI_Comm       comm = ((PetscObject)mat)->comm;
2926   Mat_SeqAIJ     *aij;
2927 
2928 
2929   PetscFunctionBegin;
2930   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2931   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2932 
2933   if (call ==  MAT_REUSE_MATRIX) {
2934     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2935     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2936     local = &Mreuse;
2937     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2938   } else {
2939     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2940     Mreuse = *local;
2941     ierr   = PetscFree(local);CHKERRQ(ierr);
2942   }
2943 
2944   /*
2945       m - number of local rows
2946       n - number of columns (same on all processors)
2947       rstart - first row in new global matrix generated
2948   */
2949   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2950   if (call == MAT_INITIAL_MATRIX) {
2951     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2952     ii  = aij->i;
2953     jj  = aij->j;
2954 
2955     /*
2956         Determine the number of non-zeros in the diagonal and off-diagonal
2957         portions of the matrix in order to do correct preallocation
2958     */
2959 
2960     /* first get start and end of "diagonal" columns */
2961     if (csize == PETSC_DECIDE) {
2962       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2963       if (mglobal == n) { /* square matrix */
2964 	nlocal = m;
2965       } else {
2966         nlocal = n/size + ((n % size) > rank);
2967       }
2968     } else {
2969       nlocal = csize;
2970     }
2971     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2972     rstart = rend - nlocal;
2973     if (rank == size - 1 && rend != n) {
2974       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2975     }
2976 
2977     /* next, compute all the lengths */
2978     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2979     olens = dlens + m;
2980     for (i=0; i<m; i++) {
2981       jend = ii[i+1] - ii[i];
2982       olen = 0;
2983       dlen = 0;
2984       for (j=0; j<jend; j++) {
2985         if (*jj < rstart || *jj >= rend) olen++;
2986         else dlen++;
2987         jj++;
2988       }
2989       olens[i] = olen;
2990       dlens[i] = dlen;
2991     }
2992     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2993     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2994     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2995     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2996     ierr = PetscFree(dlens);CHKERRQ(ierr);
2997   } else {
2998     PetscInt ml,nl;
2999 
3000     M = *newmat;
3001     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3002     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3003     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3004     /*
3005          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3006        rather than the slower MatSetValues().
3007     */
3008     M->was_assembled = PETSC_TRUE;
3009     M->assembled     = PETSC_FALSE;
3010   }
3011   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3012   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3013   ii  = aij->i;
3014   jj  = aij->j;
3015   aa  = aij->a;
3016   for (i=0; i<m; i++) {
3017     row   = rstart + i;
3018     nz    = ii[i+1] - ii[i];
3019     cwork = jj;     jj += nz;
3020     vwork = aa;     aa += nz;
3021     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3022   }
3023 
3024   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3025   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3026   *newmat = M;
3027 
3028   /* save submatrix used in processor for next request */
3029   if (call ==  MAT_INITIAL_MATRIX) {
3030     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3031     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3032   }
3033 
3034   PetscFunctionReturn(0);
3035 }
3036 
3037 EXTERN_C_BEGIN
3038 #undef __FUNCT__
3039 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3040 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3041 {
3042   PetscInt       m,cstart, cend,j,nnz,i,d;
3043   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3044   const PetscInt *JJ;
3045   PetscScalar    *values;
3046   PetscErrorCode ierr;
3047 
3048   PetscFunctionBegin;
3049   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3050 
3051   B->rmap.bs = B->cmap.bs = 1;
3052   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
3053   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
3054   m      = B->rmap.n;
3055   cstart = B->cmap.rstart;
3056   cend   = B->cmap.rend;
3057   rstart = B->rmap.rstart;
3058 
3059   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
3060   o_nnz = d_nnz + m;
3061 
3062 #if defined(PETSC_USE_DEBUGGING)
3063   for (i=0; i<m; i++) {
3064     nnz     = Ii[i+1]- Ii[i];
3065     JJ      = J + Ii[i];
3066     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3067     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3068     if (nnz && (JJ[nnz-1] >= B->cmap.N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap.N);
3069     for (j=1; j<nnz; j++) {
3070       if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
3071     }
3072   }
3073 #endif
3074 
3075   for (i=0; i<m; i++) {
3076     nnz     = Ii[i+1]- Ii[i];
3077     JJ      = J + Ii[i];
3078     nnz_max = PetscMax(nnz_max,nnz);
3079     for (j=0; j<nnz; j++) {
3080       if (*JJ >= cstart) break;
3081       JJ++;
3082     }
3083     d = 0;
3084     for (; j<nnz; j++) {
3085       if (*JJ++ >= cend) break;
3086       d++;
3087     }
3088     d_nnz[i] = d;
3089     o_nnz[i] = nnz - d;
3090   }
3091   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3092   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
3093 
3094   if (v) values = (PetscScalar*)v;
3095   else {
3096     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3097     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3098   }
3099 
3100   for (i=0; i<m; i++) {
3101     ii   = i + rstart;
3102     nnz  = Ii[i+1]- Ii[i];
3103     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3104   }
3105   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3106   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3107 
3108   if (!v) {
3109     ierr = PetscFree(values);CHKERRQ(ierr);
3110   }
3111   PetscFunctionReturn(0);
3112 }
3113 EXTERN_C_END
3114 
3115 #undef __FUNCT__
3116 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3117 /*@
3118    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3119    (the default parallel PETSc format).
3120 
3121    Collective on MPI_Comm
3122 
3123    Input Parameters:
3124 +  B - the matrix
3125 .  i - the indices into j for the start of each local row (starts with zero)
3126 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3127 -  v - optional values in the matrix
3128 
3129    Level: developer
3130 
3131    Notes:
3132        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3133      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3134      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3135 
3136        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3137 
3138        The format which is used for the sparse matrix input, is equivalent to a
3139     row-major ordering.. i.e for the following matrix, the input data expected is
3140     as shown:
3141 
3142         1 0 0
3143         2 0 3     P0
3144        -------
3145         4 5 6     P1
3146 
3147      Process0 [P0]: rows_owned=[0,1]
3148         i =  {0,1,3}  [size = nrow+1  = 2+1]
3149         j =  {0,0,2}  [size = nz = 6]
3150         v =  {1,2,3}  [size = nz = 6]
3151 
3152      Process1 [P1]: rows_owned=[2]
3153         i =  {0,3}    [size = nrow+1  = 1+1]
3154         j =  {0,1,2}  [size = nz = 6]
3155         v =  {4,5,6}  [size = nz = 6]
3156 
3157       The column indices for each row MUST be sorted.
3158 
3159 .keywords: matrix, aij, compressed row, sparse, parallel
3160 
3161 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3162           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3163 @*/
3164 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3165 {
3166   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3167 
3168   PetscFunctionBegin;
3169   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3170   if (f) {
3171     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3172   }
3173   PetscFunctionReturn(0);
3174 }
3175 
3176 #undef __FUNCT__
3177 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3178 /*@C
3179    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3180    (the default parallel PETSc format).  For good matrix assembly performance
3181    the user should preallocate the matrix storage by setting the parameters
3182    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3183    performance can be increased by more than a factor of 50.
3184 
3185    Collective on MPI_Comm
3186 
3187    Input Parameters:
3188 +  A - the matrix
3189 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3190            (same value is used for all local rows)
3191 .  d_nnz - array containing the number of nonzeros in the various rows of the
3192            DIAGONAL portion of the local submatrix (possibly different for each row)
3193            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3194            The size of this array is equal to the number of local rows, i.e 'm'.
3195            You must leave room for the diagonal entry even if it is zero.
3196 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3197            submatrix (same value is used for all local rows).
3198 -  o_nnz - array containing the number of nonzeros in the various rows of the
3199            OFF-DIAGONAL portion of the local submatrix (possibly different for
3200            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3201            structure. The size of this array is equal to the number
3202            of local rows, i.e 'm'.
3203 
3204    If the *_nnz parameter is given then the *_nz parameter is ignored
3205 
3206    The AIJ format (also called the Yale sparse matrix format or
3207    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3208    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3209 
3210    The parallel matrix is partitioned such that the first m0 rows belong to
3211    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3212    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3213 
3214    The DIAGONAL portion of the local submatrix of a processor can be defined
3215    as the submatrix which is obtained by extraction the part corresponding
3216    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3217    first row that belongs to the processor, and r2 is the last row belonging
3218    to the this processor. This is a square mxm matrix. The remaining portion
3219    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3220 
3221    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3222 
3223    You can call MatGetInfo() to get information on how effective the preallocation was;
3224    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3225    You can also run with the option -info and look for messages with the string
3226    malloc in them to see if additional memory allocation was needed.
3227 
3228    Example usage:
3229 
3230    Consider the following 8x8 matrix with 34 non-zero values, that is
3231    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3232    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3233    as follows:
3234 
3235 .vb
3236             1  2  0  |  0  3  0  |  0  4
3237     Proc0   0  5  6  |  7  0  0  |  8  0
3238             9  0 10  | 11  0  0  | 12  0
3239     -------------------------------------
3240            13  0 14  | 15 16 17  |  0  0
3241     Proc1   0 18  0  | 19 20 21  |  0  0
3242             0  0  0  | 22 23  0  | 24  0
3243     -------------------------------------
3244     Proc2  25 26 27  |  0  0 28  | 29  0
3245            30  0  0  | 31 32 33  |  0 34
3246 .ve
3247 
3248    This can be represented as a collection of submatrices as:
3249 
3250 .vb
3251       A B C
3252       D E F
3253       G H I
3254 .ve
3255 
3256    Where the submatrices A,B,C are owned by proc0, D,E,F are
3257    owned by proc1, G,H,I are owned by proc2.
3258 
3259    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3260    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3261    The 'M','N' parameters are 8,8, and have the same values on all procs.
3262 
3263    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3264    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3265    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3266    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3267    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3268    matrix, ans [DF] as another SeqAIJ matrix.
3269 
3270    When d_nz, o_nz parameters are specified, d_nz storage elements are
3271    allocated for every row of the local diagonal submatrix, and o_nz
3272    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3273    One way to choose d_nz and o_nz is to use the max nonzerors per local
3274    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3275    In this case, the values of d_nz,o_nz are:
3276 .vb
3277      proc0 : dnz = 2, o_nz = 2
3278      proc1 : dnz = 3, o_nz = 2
3279      proc2 : dnz = 1, o_nz = 4
3280 .ve
3281    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3282    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3283    for proc3. i.e we are using 12+15+10=37 storage locations to store
3284    34 values.
3285 
3286    When d_nnz, o_nnz parameters are specified, the storage is specified
3287    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3288    In the above case the values for d_nnz,o_nnz are:
3289 .vb
3290      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3291      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3292      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3293 .ve
3294    Here the space allocated is sum of all the above values i.e 34, and
3295    hence pre-allocation is perfect.
3296 
3297    Level: intermediate
3298 
3299 .keywords: matrix, aij, compressed row, sparse, parallel
3300 
3301 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3302           MPIAIJ, MatGetInfo()
3303 @*/
3304 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3305 {
3306   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3307 
3308   PetscFunctionBegin;
3309   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3310   if (f) {
3311     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3312   }
3313   PetscFunctionReturn(0);
3314 }
3315 
3316 #undef __FUNCT__
3317 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3318 /*@
3319      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3320          CSR format the local rows.
3321 
3322    Collective on MPI_Comm
3323 
3324    Input Parameters:
3325 +  comm - MPI communicator
3326 .  m - number of local rows (Cannot be PETSC_DECIDE)
3327 .  n - This value should be the same as the local size used in creating the
3328        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3329        calculated if N is given) For square matrices n is almost always m.
3330 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3331 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3332 .   i - row indices
3333 .   j - column indices
3334 -   a - matrix values
3335 
3336    Output Parameter:
3337 .   mat - the matrix
3338 
3339    Level: intermediate
3340 
3341    Notes:
3342        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3343      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3344      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3345 
3346        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3347 
3348        The format which is used for the sparse matrix input, is equivalent to a
3349     row-major ordering.. i.e for the following matrix, the input data expected is
3350     as shown:
3351 
3352         1 0 0
3353         2 0 3     P0
3354        -------
3355         4 5 6     P1
3356 
3357      Process0 [P0]: rows_owned=[0,1]
3358         i =  {0,1,3}  [size = nrow+1  = 2+1]
3359         j =  {0,0,2}  [size = nz = 6]
3360         v =  {1,2,3}  [size = nz = 6]
3361 
3362      Process1 [P1]: rows_owned=[2]
3363         i =  {0,3}    [size = nrow+1  = 1+1]
3364         j =  {0,1,2}  [size = nz = 6]
3365         v =  {4,5,6}  [size = nz = 6]
3366 
3367 .keywords: matrix, aij, compressed row, sparse, parallel
3368 
3369 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3370           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3371 @*/
3372 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3373 {
3374   PetscErrorCode ierr;
3375 
3376  PetscFunctionBegin;
3377   if (i[0]) {
3378     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3379   }
3380   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3381   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3382   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3383   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3384   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3385   PetscFunctionReturn(0);
3386 }
3387 
3388 #undef __FUNCT__
3389 #define __FUNCT__ "MatCreateMPIAIJ"
3390 /*@C
3391    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3392    (the default parallel PETSc format).  For good matrix assembly performance
3393    the user should preallocate the matrix storage by setting the parameters
3394    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3395    performance can be increased by more than a factor of 50.
3396 
3397    Collective on MPI_Comm
3398 
3399    Input Parameters:
3400 +  comm - MPI communicator
3401 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3402            This value should be the same as the local size used in creating the
3403            y vector for the matrix-vector product y = Ax.
3404 .  n - This value should be the same as the local size used in creating the
3405        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3406        calculated if N is given) For square matrices n is almost always m.
3407 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3408 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3409 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3410            (same value is used for all local rows)
3411 .  d_nnz - array containing the number of nonzeros in the various rows of the
3412            DIAGONAL portion of the local submatrix (possibly different for each row)
3413            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3414            The size of this array is equal to the number of local rows, i.e 'm'.
3415            You must leave room for the diagonal entry even if it is zero.
3416 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3417            submatrix (same value is used for all local rows).
3418 -  o_nnz - array containing the number of nonzeros in the various rows of the
3419            OFF-DIAGONAL portion of the local submatrix (possibly different for
3420            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3421            structure. The size of this array is equal to the number
3422            of local rows, i.e 'm'.
3423 
3424    Output Parameter:
3425 .  A - the matrix
3426 
3427    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3428    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
3429    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
3430    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3431 
3432    Notes:
3433    If the *_nnz parameter is given then the *_nz parameter is ignored
3434 
3435    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3436    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3437    storage requirements for this matrix.
3438 
3439    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3440    processor than it must be used on all processors that share the object for
3441    that argument.
3442 
3443    The user MUST specify either the local or global matrix dimensions
3444    (possibly both).
3445 
3446    The parallel matrix is partitioned across processors such that the
3447    first m0 rows belong to process 0, the next m1 rows belong to
3448    process 1, the next m2 rows belong to process 2 etc.. where
3449    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3450    values corresponding to [m x N] submatrix.
3451 
3452    The columns are logically partitioned with the n0 columns belonging
3453    to 0th partition, the next n1 columns belonging to the next
3454    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3455 
3456    The DIAGONAL portion of the local submatrix on any given processor
3457    is the submatrix corresponding to the rows and columns m,n
3458    corresponding to the given processor. i.e diagonal matrix on
3459    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3460    etc. The remaining portion of the local submatrix [m x (N-n)]
3461    constitute the OFF-DIAGONAL portion. The example below better
3462    illustrates this concept.
3463 
3464    For a square global matrix we define each processor's diagonal portion
3465    to be its local rows and the corresponding columns (a square submatrix);
3466    each processor's off-diagonal portion encompasses the remainder of the
3467    local matrix (a rectangular submatrix).
3468 
3469    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3470 
3471    When calling this routine with a single process communicator, a matrix of
3472    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3473    type of communicator, use the construction mechanism:
3474      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3475 
3476    By default, this format uses inodes (identical nodes) when possible.
3477    We search for consecutive rows with the same nonzero structure, thereby
3478    reusing matrix information to achieve increased efficiency.
3479 
3480    Options Database Keys:
3481 +  -mat_no_inode  - Do not use inodes
3482 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3483 -  -mat_aij_oneindex - Internally use indexing starting at 1
3484         rather than 0.  Note that when calling MatSetValues(),
3485         the user still MUST index entries starting at 0!
3486 
3487 
3488    Example usage:
3489 
3490    Consider the following 8x8 matrix with 34 non-zero values, that is
3491    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3492    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3493    as follows:
3494 
3495 .vb
3496             1  2  0  |  0  3  0  |  0  4
3497     Proc0   0  5  6  |  7  0  0  |  8  0
3498             9  0 10  | 11  0  0  | 12  0
3499     -------------------------------------
3500            13  0 14  | 15 16 17  |  0  0
3501     Proc1   0 18  0  | 19 20 21  |  0  0
3502             0  0  0  | 22 23  0  | 24  0
3503     -------------------------------------
3504     Proc2  25 26 27  |  0  0 28  | 29  0
3505            30  0  0  | 31 32 33  |  0 34
3506 .ve
3507 
3508    This can be represented as a collection of submatrices as:
3509 
3510 .vb
3511       A B C
3512       D E F
3513       G H I
3514 .ve
3515 
3516    Where the submatrices A,B,C are owned by proc0, D,E,F are
3517    owned by proc1, G,H,I are owned by proc2.
3518 
3519    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3520    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3521    The 'M','N' parameters are 8,8, and have the same values on all procs.
3522 
3523    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3524    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3525    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3526    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3527    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3528    matrix, ans [DF] as another SeqAIJ matrix.
3529 
3530    When d_nz, o_nz parameters are specified, d_nz storage elements are
3531    allocated for every row of the local diagonal submatrix, and o_nz
3532    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3533    One way to choose d_nz and o_nz is to use the max nonzerors per local
3534    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3535    In this case, the values of d_nz,o_nz are:
3536 .vb
3537      proc0 : dnz = 2, o_nz = 2
3538      proc1 : dnz = 3, o_nz = 2
3539      proc2 : dnz = 1, o_nz = 4
3540 .ve
3541    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3542    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3543    for proc3. i.e we are using 12+15+10=37 storage locations to store
3544    34 values.
3545 
3546    When d_nnz, o_nnz parameters are specified, the storage is specified
3547    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3548    In the above case the values for d_nnz,o_nnz are:
3549 .vb
3550      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3551      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3552      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3553 .ve
3554    Here the space allocated is sum of all the above values i.e 34, and
3555    hence pre-allocation is perfect.
3556 
3557    Level: intermediate
3558 
3559 .keywords: matrix, aij, compressed row, sparse, parallel
3560 
3561 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3562           MPIAIJ, MatCreateMPIAIJWithArrays()
3563 @*/
3564 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3565 {
3566   PetscErrorCode ierr;
3567   PetscMPIInt    size;
3568 
3569   PetscFunctionBegin;
3570   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3571   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3572   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3573   if (size > 1) {
3574     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3575     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3576   } else {
3577     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3578     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3579   }
3580   PetscFunctionReturn(0);
3581 }
3582 
3583 #undef __FUNCT__
3584 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3585 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3586 {
3587   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3588 
3589   PetscFunctionBegin;
3590   *Ad     = a->A;
3591   *Ao     = a->B;
3592   *colmap = a->garray;
3593   PetscFunctionReturn(0);
3594 }
3595 
3596 #undef __FUNCT__
3597 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3598 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3599 {
3600   PetscErrorCode ierr;
3601   PetscInt       i;
3602   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3603 
3604   PetscFunctionBegin;
3605   if (coloring->ctype == IS_COLORING_GLOBAL) {
3606     ISColoringValue *allcolors,*colors;
3607     ISColoring      ocoloring;
3608 
3609     /* set coloring for diagonal portion */
3610     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3611 
3612     /* set coloring for off-diagonal portion */
3613     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3614     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3615     for (i=0; i<a->B->cmap.n; i++) {
3616       colors[i] = allcolors[a->garray[i]];
3617     }
3618     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3619     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3620     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3621     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3622   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3623     ISColoringValue *colors;
3624     PetscInt        *larray;
3625     ISColoring      ocoloring;
3626 
3627     /* set coloring for diagonal portion */
3628     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3629     for (i=0; i<a->A->cmap.n; i++) {
3630       larray[i] = i + A->cmap.rstart;
3631     }
3632     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3633     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3634     for (i=0; i<a->A->cmap.n; i++) {
3635       colors[i] = coloring->colors[larray[i]];
3636     }
3637     ierr = PetscFree(larray);CHKERRQ(ierr);
3638     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3639     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3640     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3641 
3642     /* set coloring for off-diagonal portion */
3643     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3644     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3645     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3646     for (i=0; i<a->B->cmap.n; i++) {
3647       colors[i] = coloring->colors[larray[i]];
3648     }
3649     ierr = PetscFree(larray);CHKERRQ(ierr);
3650     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3651     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3652     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3653   } else {
3654     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3655   }
3656 
3657   PetscFunctionReturn(0);
3658 }
3659 
3660 #if defined(PETSC_HAVE_ADIC)
3661 #undef __FUNCT__
3662 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3663 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3664 {
3665   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3666   PetscErrorCode ierr;
3667 
3668   PetscFunctionBegin;
3669   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3670   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3671   PetscFunctionReturn(0);
3672 }
3673 #endif
3674 
3675 #undef __FUNCT__
3676 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3677 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3678 {
3679   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3680   PetscErrorCode ierr;
3681 
3682   PetscFunctionBegin;
3683   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3684   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3685   PetscFunctionReturn(0);
3686 }
3687 
3688 #undef __FUNCT__
3689 #define __FUNCT__ "MatMerge"
3690 /*@
3691       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3692                  matrices from each processor
3693 
3694     Collective on MPI_Comm
3695 
3696    Input Parameters:
3697 +    comm - the communicators the parallel matrix will live on
3698 .    inmat - the input sequential matrices
3699 .    n - number of local columns (or PETSC_DECIDE)
3700 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3701 
3702    Output Parameter:
3703 .    outmat - the parallel matrix generated
3704 
3705     Level: advanced
3706 
3707    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3708 
3709 @*/
3710 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3711 {
3712   PetscErrorCode ierr;
3713   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3714   PetscInt       *indx;
3715   PetscScalar    *values;
3716 
3717   PetscFunctionBegin;
3718   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3719   if (scall == MAT_INITIAL_MATRIX){
3720     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3721     if (n == PETSC_DECIDE){
3722       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3723     }
3724     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3725     rstart -= m;
3726 
3727     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3728     for (i=0;i<m;i++) {
3729       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3730       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3731       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3732     }
3733     /* This routine will ONLY return MPIAIJ type matrix */
3734     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3735     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3736     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3737     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3738     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3739 
3740   } else if (scall == MAT_REUSE_MATRIX){
3741     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3742   } else {
3743     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3744   }
3745 
3746   for (i=0;i<m;i++) {
3747     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3748     Ii    = i + rstart;
3749     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3750     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3751   }
3752   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3753   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3754   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3755 
3756   PetscFunctionReturn(0);
3757 }
3758 
3759 #undef __FUNCT__
3760 #define __FUNCT__ "MatFileSplit"
3761 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3762 {
3763   PetscErrorCode    ierr;
3764   PetscMPIInt       rank;
3765   PetscInt          m,N,i,rstart,nnz;
3766   size_t            len;
3767   const PetscInt    *indx;
3768   PetscViewer       out;
3769   char              *name;
3770   Mat               B;
3771   const PetscScalar *values;
3772 
3773   PetscFunctionBegin;
3774   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3775   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3776   /* Should this be the type of the diagonal block of A? */
3777   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3778   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3779   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3780   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3781   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3782   for (i=0;i<m;i++) {
3783     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3784     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3785     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3786   }
3787   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3788   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3789 
3790   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3791   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3792   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3793   sprintf(name,"%s.%d",outfile,rank);
3794   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3795   ierr = PetscFree(name);
3796   ierr = MatView(B,out);CHKERRQ(ierr);
3797   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3798   ierr = MatDestroy(B);CHKERRQ(ierr);
3799   PetscFunctionReturn(0);
3800 }
3801 
3802 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3803 #undef __FUNCT__
3804 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3805 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3806 {
3807   PetscErrorCode       ierr;
3808   Mat_Merge_SeqsToMPI  *merge;
3809   PetscContainer       container;
3810 
3811   PetscFunctionBegin;
3812   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3813   if (container) {
3814     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3815     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3816     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3817     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3818     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3819     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3820     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3821     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3822     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3823     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3824     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3825     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3826 
3827     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3828     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3829   }
3830   ierr = PetscFree(merge);CHKERRQ(ierr);
3831 
3832   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3833   PetscFunctionReturn(0);
3834 }
3835 
3836 #include "src/mat/utils/freespace.h"
3837 #include "petscbt.h"
3838 
3839 #undef __FUNCT__
3840 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3841 /*@C
3842       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3843                  matrices from each processor
3844 
3845     Collective on MPI_Comm
3846 
3847    Input Parameters:
3848 +    comm - the communicators the parallel matrix will live on
3849 .    seqmat - the input sequential matrices
3850 .    m - number of local rows (or PETSC_DECIDE)
3851 .    n - number of local columns (or PETSC_DECIDE)
3852 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3853 
3854    Output Parameter:
3855 .    mpimat - the parallel matrix generated
3856 
3857     Level: advanced
3858 
3859    Notes:
3860      The dimensions of the sequential matrix in each processor MUST be the same.
3861      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3862      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3863 @*/
3864 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3865 {
3866   PetscErrorCode       ierr;
3867   MPI_Comm             comm=((PetscObject)mpimat)->comm;
3868   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3869   PetscMPIInt          size,rank,taga,*len_s;
3870   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3871   PetscInt             proc,m;
3872   PetscInt             **buf_ri,**buf_rj;
3873   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3874   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3875   MPI_Request          *s_waits,*r_waits;
3876   MPI_Status           *status;
3877   MatScalar            *aa=a->a;
3878   MatScalar            **abuf_r,*ba_i;
3879   Mat_Merge_SeqsToMPI  *merge;
3880   PetscContainer       container;
3881 
3882   PetscFunctionBegin;
3883   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3884 
3885   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3886   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3887 
3888   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3889   if (container) {
3890     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3891   }
3892   bi     = merge->bi;
3893   bj     = merge->bj;
3894   buf_ri = merge->buf_ri;
3895   buf_rj = merge->buf_rj;
3896 
3897   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3898   owners = merge->rowmap.range;
3899   len_s  = merge->len_s;
3900 
3901   /* send and recv matrix values */
3902   /*-----------------------------*/
3903   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3904   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3905 
3906   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3907   for (proc=0,k=0; proc<size; proc++){
3908     if (!len_s[proc]) continue;
3909     i = owners[proc];
3910     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3911     k++;
3912   }
3913 
3914   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3915   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3916   ierr = PetscFree(status);CHKERRQ(ierr);
3917 
3918   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3919   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3920 
3921   /* insert mat values of mpimat */
3922   /*----------------------------*/
3923   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
3924   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3925   nextrow = buf_ri_k + merge->nrecv;
3926   nextai  = nextrow + merge->nrecv;
3927 
3928   for (k=0; k<merge->nrecv; k++){
3929     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3930     nrows = *(buf_ri_k[k]);
3931     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3932     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3933   }
3934 
3935   /* set values of ba */
3936   m = merge->rowmap.n;
3937   for (i=0; i<m; i++) {
3938     arow = owners[rank] + i;
3939     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3940     bnzi = bi[i+1] - bi[i];
3941     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
3942 
3943     /* add local non-zero vals of this proc's seqmat into ba */
3944     anzi = ai[arow+1] - ai[arow];
3945     aj   = a->j + ai[arow];
3946     aa   = a->a + ai[arow];
3947     nextaj = 0;
3948     for (j=0; nextaj<anzi; j++){
3949       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3950         ba_i[j] += aa[nextaj++];
3951       }
3952     }
3953 
3954     /* add received vals into ba */
3955     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3956       /* i-th row */
3957       if (i == *nextrow[k]) {
3958         anzi = *(nextai[k]+1) - *nextai[k];
3959         aj   = buf_rj[k] + *(nextai[k]);
3960         aa   = abuf_r[k] + *(nextai[k]);
3961         nextaj = 0;
3962         for (j=0; nextaj<anzi; j++){
3963           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3964             ba_i[j] += aa[nextaj++];
3965           }
3966         }
3967         nextrow[k]++; nextai[k]++;
3968       }
3969     }
3970     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3971   }
3972   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3973   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3974 
3975   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3976   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3977   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3978   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3979   PetscFunctionReturn(0);
3980 }
3981 
3982 #undef __FUNCT__
3983 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3984 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3985 {
3986   PetscErrorCode       ierr;
3987   Mat                  B_mpi;
3988   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3989   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3990   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3991   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3992   PetscInt             len,proc,*dnz,*onz;
3993   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3994   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3995   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3996   MPI_Status           *status;
3997   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3998   PetscBT              lnkbt;
3999   Mat_Merge_SeqsToMPI  *merge;
4000   PetscContainer       container;
4001 
4002   PetscFunctionBegin;
4003   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4004 
4005   /* make sure it is a PETSc comm */
4006   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4007   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4008   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4009 
4010   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4011   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4012 
4013   /* determine row ownership */
4014   /*---------------------------------------------------------*/
4015   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
4016   merge->rowmap.n = m;
4017   merge->rowmap.N = M;
4018   merge->rowmap.bs = 1;
4019   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
4020   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4021   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4022 
4023   m      = merge->rowmap.n;
4024   M      = merge->rowmap.N;
4025   owners = merge->rowmap.range;
4026 
4027   /* determine the number of messages to send, their lengths */
4028   /*---------------------------------------------------------*/
4029   len_s  = merge->len_s;
4030 
4031   len = 0;  /* length of buf_si[] */
4032   merge->nsend = 0;
4033   for (proc=0; proc<size; proc++){
4034     len_si[proc] = 0;
4035     if (proc == rank){
4036       len_s[proc] = 0;
4037     } else {
4038       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4039       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4040     }
4041     if (len_s[proc]) {
4042       merge->nsend++;
4043       nrows = 0;
4044       for (i=owners[proc]; i<owners[proc+1]; i++){
4045         if (ai[i+1] > ai[i]) nrows++;
4046       }
4047       len_si[proc] = 2*(nrows+1);
4048       len += len_si[proc];
4049     }
4050   }
4051 
4052   /* determine the number and length of messages to receive for ij-structure */
4053   /*-------------------------------------------------------------------------*/
4054   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4055   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4056 
4057   /* post the Irecv of j-structure */
4058   /*-------------------------------*/
4059   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4060   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4061 
4062   /* post the Isend of j-structure */
4063   /*--------------------------------*/
4064   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
4065   sj_waits = si_waits + merge->nsend;
4066 
4067   for (proc=0, k=0; proc<size; proc++){
4068     if (!len_s[proc]) continue;
4069     i = owners[proc];
4070     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4071     k++;
4072   }
4073 
4074   /* receives and sends of j-structure are complete */
4075   /*------------------------------------------------*/
4076   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4077   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4078 
4079   /* send and recv i-structure */
4080   /*---------------------------*/
4081   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4082   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4083 
4084   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4085   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4086   for (proc=0,k=0; proc<size; proc++){
4087     if (!len_s[proc]) continue;
4088     /* form outgoing message for i-structure:
4089          buf_si[0]:                 nrows to be sent
4090                [1:nrows]:           row index (global)
4091                [nrows+1:2*nrows+1]: i-structure index
4092     */
4093     /*-------------------------------------------*/
4094     nrows = len_si[proc]/2 - 1;
4095     buf_si_i    = buf_si + nrows+1;
4096     buf_si[0]   = nrows;
4097     buf_si_i[0] = 0;
4098     nrows = 0;
4099     for (i=owners[proc]; i<owners[proc+1]; i++){
4100       anzi = ai[i+1] - ai[i];
4101       if (anzi) {
4102         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4103         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4104         nrows++;
4105       }
4106     }
4107     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4108     k++;
4109     buf_si += len_si[proc];
4110   }
4111 
4112   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4113   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4114 
4115   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4116   for (i=0; i<merge->nrecv; i++){
4117     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4118   }
4119 
4120   ierr = PetscFree(len_si);CHKERRQ(ierr);
4121   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4122   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4123   ierr = PetscFree(si_waits);CHKERRQ(ierr);
4124   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4125   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4126   ierr = PetscFree(status);CHKERRQ(ierr);
4127 
4128   /* compute a local seq matrix in each processor */
4129   /*----------------------------------------------*/
4130   /* allocate bi array and free space for accumulating nonzero column info */
4131   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4132   bi[0] = 0;
4133 
4134   /* create and initialize a linked list */
4135   nlnk = N+1;
4136   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4137 
4138   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4139   len = 0;
4140   len  = ai[owners[rank+1]] - ai[owners[rank]];
4141   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4142   current_space = free_space;
4143 
4144   /* determine symbolic info for each local row */
4145   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4146   nextrow = buf_ri_k + merge->nrecv;
4147   nextai  = nextrow + merge->nrecv;
4148   for (k=0; k<merge->nrecv; k++){
4149     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4150     nrows = *buf_ri_k[k];
4151     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4152     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4153   }
4154 
4155   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4156   len = 0;
4157   for (i=0;i<m;i++) {
4158     bnzi   = 0;
4159     /* add local non-zero cols of this proc's seqmat into lnk */
4160     arow   = owners[rank] + i;
4161     anzi   = ai[arow+1] - ai[arow];
4162     aj     = a->j + ai[arow];
4163     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4164     bnzi += nlnk;
4165     /* add received col data into lnk */
4166     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4167       if (i == *nextrow[k]) { /* i-th row */
4168         anzi = *(nextai[k]+1) - *nextai[k];
4169         aj   = buf_rj[k] + *nextai[k];
4170         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4171         bnzi += nlnk;
4172         nextrow[k]++; nextai[k]++;
4173       }
4174     }
4175     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4176 
4177     /* if free space is not available, make more free space */
4178     if (current_space->local_remaining<bnzi) {
4179       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4180       nspacedouble++;
4181     }
4182     /* copy data into free space, then initialize lnk */
4183     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4184     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4185 
4186     current_space->array           += bnzi;
4187     current_space->local_used      += bnzi;
4188     current_space->local_remaining -= bnzi;
4189 
4190     bi[i+1] = bi[i] + bnzi;
4191   }
4192 
4193   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4194 
4195   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4196   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4197   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4198 
4199   /* create symbolic parallel matrix B_mpi */
4200   /*---------------------------------------*/
4201   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4202   if (n==PETSC_DECIDE) {
4203     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4204   } else {
4205     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4206   }
4207   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4208   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4209   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4210 
4211   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4212   B_mpi->assembled     = PETSC_FALSE;
4213   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4214   merge->bi            = bi;
4215   merge->bj            = bj;
4216   merge->buf_ri        = buf_ri;
4217   merge->buf_rj        = buf_rj;
4218   merge->coi           = PETSC_NULL;
4219   merge->coj           = PETSC_NULL;
4220   merge->owners_co     = PETSC_NULL;
4221 
4222   /* attach the supporting struct to B_mpi for reuse */
4223   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4224   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4225   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4226   *mpimat = B_mpi;
4227 
4228   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4229   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4230   PetscFunctionReturn(0);
4231 }
4232 
4233 #undef __FUNCT__
4234 #define __FUNCT__ "MatMerge_SeqsToMPI"
4235 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4236 {
4237   PetscErrorCode   ierr;
4238 
4239   PetscFunctionBegin;
4240   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4241   if (scall == MAT_INITIAL_MATRIX){
4242     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4243   }
4244   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4245   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4246   PetscFunctionReturn(0);
4247 }
4248 
4249 #undef __FUNCT__
4250 #define __FUNCT__ "MatGetLocalMat"
4251 /*@
4252      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4253 
4254     Not Collective
4255 
4256    Input Parameters:
4257 +    A - the matrix
4258 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4259 
4260    Output Parameter:
4261 .    A_loc - the local sequential matrix generated
4262 
4263     Level: developer
4264 
4265 @*/
4266 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4267 {
4268   PetscErrorCode  ierr;
4269   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4270   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4271   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4272   MatScalar       *aa=a->a,*ba=b->a,*cam;
4273   PetscScalar     *ca;
4274   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
4275   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4276 
4277   PetscFunctionBegin;
4278   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4279   if (scall == MAT_INITIAL_MATRIX){
4280     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4281     ci[0] = 0;
4282     for (i=0; i<am; i++){
4283       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4284     }
4285     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4286     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4287     k = 0;
4288     for (i=0; i<am; i++) {
4289       ncols_o = bi[i+1] - bi[i];
4290       ncols_d = ai[i+1] - ai[i];
4291       /* off-diagonal portion of A */
4292       for (jo=0; jo<ncols_o; jo++) {
4293         col = cmap[*bj];
4294         if (col >= cstart) break;
4295         cj[k]   = col; bj++;
4296         ca[k++] = *ba++;
4297       }
4298       /* diagonal portion of A */
4299       for (j=0; j<ncols_d; j++) {
4300         cj[k]   = cstart + *aj++;
4301         ca[k++] = *aa++;
4302       }
4303       /* off-diagonal portion of A */
4304       for (j=jo; j<ncols_o; j++) {
4305         cj[k]   = cmap[*bj++];
4306         ca[k++] = *ba++;
4307       }
4308     }
4309     /* put together the new matrix */
4310     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4311     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4312     /* Since these are PETSc arrays, change flags to free them as necessary. */
4313     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4314     mat->free_a  = PETSC_TRUE;
4315     mat->free_ij = PETSC_TRUE;
4316     mat->nonew   = 0;
4317   } else if (scall == MAT_REUSE_MATRIX){
4318     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4319     ci = mat->i; cj = mat->j; cam = mat->a;
4320     for (i=0; i<am; i++) {
4321       /* off-diagonal portion of A */
4322       ncols_o = bi[i+1] - bi[i];
4323       for (jo=0; jo<ncols_o; jo++) {
4324         col = cmap[*bj];
4325         if (col >= cstart) break;
4326         *cam++ = *ba++; bj++;
4327       }
4328       /* diagonal portion of A */
4329       ncols_d = ai[i+1] - ai[i];
4330       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4331       /* off-diagonal portion of A */
4332       for (j=jo; j<ncols_o; j++) {
4333         *cam++ = *ba++; bj++;
4334       }
4335     }
4336   } else {
4337     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4338   }
4339 
4340   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4341   PetscFunctionReturn(0);
4342 }
4343 
4344 #undef __FUNCT__
4345 #define __FUNCT__ "MatGetLocalMatCondensed"
4346 /*@C
4347      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4348 
4349     Not Collective
4350 
4351    Input Parameters:
4352 +    A - the matrix
4353 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4354 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4355 
4356    Output Parameter:
4357 .    A_loc - the local sequential matrix generated
4358 
4359     Level: developer
4360 
4361 @*/
4362 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4363 {
4364   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4365   PetscErrorCode    ierr;
4366   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4367   IS                isrowa,iscola;
4368   Mat               *aloc;
4369 
4370   PetscFunctionBegin;
4371   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4372   if (!row){
4373     start = A->rmap.rstart; end = A->rmap.rend;
4374     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4375   } else {
4376     isrowa = *row;
4377   }
4378   if (!col){
4379     start = A->cmap.rstart;
4380     cmap  = a->garray;
4381     nzA   = a->A->cmap.n;
4382     nzB   = a->B->cmap.n;
4383     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4384     ncols = 0;
4385     for (i=0; i<nzB; i++) {
4386       if (cmap[i] < start) idx[ncols++] = cmap[i];
4387       else break;
4388     }
4389     imark = i;
4390     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4391     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4392     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4393     ierr = PetscFree(idx);CHKERRQ(ierr);
4394   } else {
4395     iscola = *col;
4396   }
4397   if (scall != MAT_INITIAL_MATRIX){
4398     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4399     aloc[0] = *A_loc;
4400   }
4401   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4402   *A_loc = aloc[0];
4403   ierr = PetscFree(aloc);CHKERRQ(ierr);
4404   if (!row){
4405     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4406   }
4407   if (!col){
4408     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4409   }
4410   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4411   PetscFunctionReturn(0);
4412 }
4413 
4414 #undef __FUNCT__
4415 #define __FUNCT__ "MatGetBrowsOfAcols"
4416 /*@C
4417     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4418 
4419     Collective on Mat
4420 
4421    Input Parameters:
4422 +    A,B - the matrices in mpiaij format
4423 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4424 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4425 
4426    Output Parameter:
4427 +    rowb, colb - index sets of rows and columns of B to extract
4428 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
4429 -    B_seq - the sequential matrix generated
4430 
4431     Level: developer
4432 
4433 @*/
4434 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4435 {
4436   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4437   PetscErrorCode    ierr;
4438   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4439   IS                isrowb,iscolb;
4440   Mat               *bseq;
4441 
4442   PetscFunctionBegin;
4443   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4444     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4445   }
4446   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4447 
4448   if (scall == MAT_INITIAL_MATRIX){
4449     start = A->cmap.rstart;
4450     cmap  = a->garray;
4451     nzA   = a->A->cmap.n;
4452     nzB   = a->B->cmap.n;
4453     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4454     ncols = 0;
4455     for (i=0; i<nzB; i++) {  /* row < local row index */
4456       if (cmap[i] < start) idx[ncols++] = cmap[i];
4457       else break;
4458     }
4459     imark = i;
4460     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4461     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4462     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4463     ierr = PetscFree(idx);CHKERRQ(ierr);
4464     *brstart = imark;
4465     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
4466   } else {
4467     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4468     isrowb = *rowb; iscolb = *colb;
4469     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4470     bseq[0] = *B_seq;
4471   }
4472   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4473   *B_seq = bseq[0];
4474   ierr = PetscFree(bseq);CHKERRQ(ierr);
4475   if (!rowb){
4476     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4477   } else {
4478     *rowb = isrowb;
4479   }
4480   if (!colb){
4481     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4482   } else {
4483     *colb = iscolb;
4484   }
4485   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4486   PetscFunctionReturn(0);
4487 }
4488 
4489 #undef __FUNCT__
4490 #define __FUNCT__ "MatGetBrowsOfAoCols"
4491 /*@C
4492     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4493     of the OFF-DIAGONAL portion of local A
4494 
4495     Collective on Mat
4496 
4497    Input Parameters:
4498 +    A,B - the matrices in mpiaij format
4499 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4500 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4501 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4502 
4503    Output Parameter:
4504 +    B_oth - the sequential matrix generated
4505 
4506     Level: developer
4507 
4508 @*/
4509 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth)
4510 {
4511   VecScatter_MPI_General *gen_to,*gen_from;
4512   PetscErrorCode         ierr;
4513   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4514   Mat_SeqAIJ             *b_oth;
4515   VecScatter             ctx=a->Mvctx;
4516   MPI_Comm               comm=((PetscObject)ctx)->comm;
4517   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4518   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
4519   PetscScalar            *rvalues,*svalues;
4520   MatScalar              *b_otha,*bufa,*bufA;
4521   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4522   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4523   MPI_Status             *sstatus,rstatus;
4524   PetscMPIInt            jj;
4525   PetscInt               *cols,sbs,rbs;
4526   PetscScalar            *vals;
4527 
4528   PetscFunctionBegin;
4529   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4530     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4531   }
4532   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4533   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4534 
4535   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4536   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4537   rvalues  = gen_from->values; /* holds the length of receiving row */
4538   svalues  = gen_to->values;   /* holds the length of sending row */
4539   nrecvs   = gen_from->n;
4540   nsends   = gen_to->n;
4541 
4542   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4543   srow     = gen_to->indices;   /* local row index to be sent */
4544   sstarts  = gen_to->starts;
4545   sprocs   = gen_to->procs;
4546   sstatus  = gen_to->sstatus;
4547   sbs      = gen_to->bs;
4548   rstarts  = gen_from->starts;
4549   rprocs   = gen_from->procs;
4550   rbs      = gen_from->bs;
4551 
4552   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4553   if (scall == MAT_INITIAL_MATRIX){
4554     /* i-array */
4555     /*---------*/
4556     /*  post receives */
4557     for (i=0; i<nrecvs; i++){
4558       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4559       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4560       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4561     }
4562 
4563     /* pack the outgoing message */
4564     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4565     rstartsj = sstartsj + nsends +1;
4566     sstartsj[0] = 0;  rstartsj[0] = 0;
4567     len = 0; /* total length of j or a array to be sent */
4568     k = 0;
4569     for (i=0; i<nsends; i++){
4570       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4571       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4572       for (j=0; j<nrows; j++) {
4573         row = srow[k] + B->rmap.range[rank]; /* global row idx */
4574         for (l=0; l<sbs; l++){
4575           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4576           rowlen[j*sbs+l] = ncols;
4577           len += ncols;
4578           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4579         }
4580         k++;
4581       }
4582       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4583       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4584     }
4585     /* recvs and sends of i-array are completed */
4586     i = nrecvs;
4587     while (i--) {
4588       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4589     }
4590     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4591 
4592     /* allocate buffers for sending j and a arrays */
4593     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4594     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4595 
4596     /* create i-array of B_oth */
4597     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4598     b_othi[0] = 0;
4599     len = 0; /* total length of j or a array to be received */
4600     k = 0;
4601     for (i=0; i<nrecvs; i++){
4602       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4603       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4604       for (j=0; j<nrows; j++) {
4605         b_othi[k+1] = b_othi[k] + rowlen[j];
4606         len += rowlen[j]; k++;
4607       }
4608       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4609     }
4610 
4611     /* allocate space for j and a arrrays of B_oth */
4612     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4613     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4614 
4615     /* j-array */
4616     /*---------*/
4617     /*  post receives of j-array */
4618     for (i=0; i<nrecvs; i++){
4619       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4620       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4621     }
4622 
4623     /* pack the outgoing message j-array */
4624     k = 0;
4625     for (i=0; i<nsends; i++){
4626       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4627       bufJ = bufj+sstartsj[i];
4628       for (j=0; j<nrows; j++) {
4629         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4630         for (ll=0; ll<sbs; ll++){
4631           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4632           for (l=0; l<ncols; l++){
4633             *bufJ++ = cols[l];
4634           }
4635           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4636         }
4637       }
4638       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4639     }
4640 
4641     /* recvs and sends of j-array are completed */
4642     i = nrecvs;
4643     while (i--) {
4644       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4645     }
4646     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4647   } else if (scall == MAT_REUSE_MATRIX){
4648     sstartsj = *startsj;
4649     rstartsj = sstartsj + nsends +1;
4650     bufa     = *bufa_ptr;
4651     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4652     b_otha   = b_oth->a;
4653   } else {
4654     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4655   }
4656 
4657   /* a-array */
4658   /*---------*/
4659   /*  post receives of a-array */
4660   for (i=0; i<nrecvs; i++){
4661     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4662     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4663   }
4664 
4665   /* pack the outgoing message a-array */
4666   k = 0;
4667   for (i=0; i<nsends; i++){
4668     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4669     bufA = bufa+sstartsj[i];
4670     for (j=0; j<nrows; j++) {
4671       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4672       for (ll=0; ll<sbs; ll++){
4673         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4674         for (l=0; l<ncols; l++){
4675           *bufA++ = vals[l];
4676         }
4677         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4678       }
4679     }
4680     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4681   }
4682   /* recvs and sends of a-array are completed */
4683   i = nrecvs;
4684   while (i--) {
4685     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4686   }
4687   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4688   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4689 
4690   if (scall == MAT_INITIAL_MATRIX){
4691     /* put together the new matrix */
4692     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4693 
4694     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4695     /* Since these are PETSc arrays, change flags to free them as necessary. */
4696     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4697     b_oth->free_a  = PETSC_TRUE;
4698     b_oth->free_ij = PETSC_TRUE;
4699     b_oth->nonew   = 0;
4700 
4701     ierr = PetscFree(bufj);CHKERRQ(ierr);
4702     if (!startsj || !bufa_ptr){
4703       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4704       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4705     } else {
4706       *startsj  = sstartsj;
4707       *bufa_ptr = bufa;
4708     }
4709   }
4710   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4711   PetscFunctionReturn(0);
4712 }
4713 
4714 #undef __FUNCT__
4715 #define __FUNCT__ "MatGetCommunicationStructs"
4716 /*@C
4717   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4718 
4719   Not Collective
4720 
4721   Input Parameters:
4722 . A - The matrix in mpiaij format
4723 
4724   Output Parameter:
4725 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4726 . colmap - A map from global column index to local index into lvec
4727 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4728 
4729   Level: developer
4730 
4731 @*/
4732 #if defined (PETSC_USE_CTABLE)
4733 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4734 #else
4735 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4736 #endif
4737 {
4738   Mat_MPIAIJ *a;
4739 
4740   PetscFunctionBegin;
4741   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4742   PetscValidPointer(lvec, 2)
4743   PetscValidPointer(colmap, 3)
4744   PetscValidPointer(multScatter, 4)
4745   a = (Mat_MPIAIJ *) A->data;
4746   if (lvec) *lvec = a->lvec;
4747   if (colmap) *colmap = a->colmap;
4748   if (multScatter) *multScatter = a->Mvctx;
4749   PetscFunctionReturn(0);
4750 }
4751 
4752 EXTERN_C_BEGIN
4753 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4754 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4755 EXTERN_C_END
4756 
4757 #include "src/mat/impls/dense/mpi/mpidense.h"
4758 
4759 #undef __FUNCT__
4760 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4761 /*
4762     Computes (B'*A')' since computing B*A directly is untenable
4763 
4764                n                       p                          p
4765         (              )       (              )         (                  )
4766       m (      A       )  *  n (       B      )   =   m (         C        )
4767         (              )       (              )         (                  )
4768 
4769 */
4770 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4771 {
4772   PetscErrorCode     ierr;
4773   Mat                At,Bt,Ct;
4774 
4775   PetscFunctionBegin;
4776   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4777   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4778   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4779   ierr = MatDestroy(At);CHKERRQ(ierr);
4780   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4781   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4782   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4783   PetscFunctionReturn(0);
4784 }
4785 
4786 #undef __FUNCT__
4787 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4788 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4789 {
4790   PetscErrorCode ierr;
4791   PetscInt       m=A->rmap.n,n=B->cmap.n;
4792   Mat            Cmat;
4793 
4794   PetscFunctionBegin;
4795   if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n);
4796   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4797   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4798   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4799   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4800   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4801   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4802   *C   = Cmat;
4803   PetscFunctionReturn(0);
4804 }
4805 
4806 /* ----------------------------------------------------------------*/
4807 #undef __FUNCT__
4808 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4809 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4810 {
4811   PetscErrorCode ierr;
4812 
4813   PetscFunctionBegin;
4814   if (scall == MAT_INITIAL_MATRIX){
4815     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4816   }
4817   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4818   PetscFunctionReturn(0);
4819 }
4820 
4821 EXTERN_C_BEGIN
4822 #if defined(PETSC_HAVE_MUMPS)
4823 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
4824 #endif
4825 #if defined(PETSC_HAVE_SUPERLU_DIST)
4826 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
4827 #endif
4828 #if defined(PETSC_HAVE_SPOOLES)
4829 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
4830 #endif
4831 EXTERN_C_END
4832 
4833 /*MC
4834    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4835 
4836    Options Database Keys:
4837 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4838 
4839   Level: beginner
4840 
4841 .seealso: MatCreateMPIAIJ()
4842 M*/
4843 
4844 EXTERN_C_BEGIN
4845 #undef __FUNCT__
4846 #define __FUNCT__ "MatCreate_MPIAIJ"
4847 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4848 {
4849   Mat_MPIAIJ     *b;
4850   PetscErrorCode ierr;
4851   PetscMPIInt    size;
4852 
4853   PetscFunctionBegin;
4854   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
4855 
4856   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
4857   B->data         = (void*)b;
4858   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4859   B->rmap.bs      = 1;
4860   B->assembled    = PETSC_FALSE;
4861   B->mapping      = 0;
4862 
4863   B->insertmode      = NOT_SET_VALUES;
4864   b->size            = size;
4865   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
4866 
4867   /* build cache for off array entries formed */
4868   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
4869   b->donotstash  = PETSC_FALSE;
4870   b->colmap      = 0;
4871   b->garray      = 0;
4872   b->roworiented = PETSC_TRUE;
4873 
4874   /* stuff used for matrix vector multiply */
4875   b->lvec      = PETSC_NULL;
4876   b->Mvctx     = PETSC_NULL;
4877 
4878   /* stuff for MatGetRow() */
4879   b->rowindices   = 0;
4880   b->rowvalues    = 0;
4881   b->getrowactive = PETSC_FALSE;
4882 
4883 #if defined(PETSC_HAVE_SPOOLES)
4884   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C",
4885                                      "MatGetFactor_mpiaij_spooles",
4886                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
4887 #endif
4888 #if defined(PETSC_HAVE_MUMPS)
4889   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C",
4890                                      "MatGetFactor_mpiaij_mumps",
4891                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
4892 #endif
4893 #if defined(PETSC_HAVE_SUPERLU_DIST)
4894   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C",
4895                                      "MatGetFactor_mpiaij_superlu_dist",
4896                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
4897 #endif
4898   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4899                                      "MatStoreValues_MPIAIJ",
4900                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4901   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4902                                      "MatRetrieveValues_MPIAIJ",
4903                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4904   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4905 				     "MatGetDiagonalBlock_MPIAIJ",
4906                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4907   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4908 				     "MatIsTranspose_MPIAIJ",
4909 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4910   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4911 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4912 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4913   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4914 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4915 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4916   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4917 				     "MatDiagonalScaleLocal_MPIAIJ",
4918 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4919   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4920                                      "MatConvert_MPIAIJ_MPICSRPERM",
4921                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
4922   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4923                                      "MatConvert_MPIAIJ_MPICRL",
4924                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
4925   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
4926                                      "MatMatMult_MPIDense_MPIAIJ",
4927                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
4928   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
4929                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
4930                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
4931   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
4932                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
4933                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
4934   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4935   PetscFunctionReturn(0);
4936 }
4937 EXTERN_C_END
4938 
4939 #undef __FUNCT__
4940 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
4941 /*@
4942      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4943          and "off-diagonal" part of the matrix in CSR format.
4944 
4945    Collective on MPI_Comm
4946 
4947    Input Parameters:
4948 +  comm - MPI communicator
4949 .  m - number of local rows (Cannot be PETSC_DECIDE)
4950 .  n - This value should be the same as the local size used in creating the
4951        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4952        calculated if N is given) For square matrices n is almost always m.
4953 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4954 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4955 .   i - row indices for "diagonal" portion of matrix
4956 .   j - column indices
4957 .   a - matrix values
4958 .   oi - row indices for "off-diagonal" portion of matrix
4959 .   oj - column indices
4960 -   oa - matrix values
4961 
4962    Output Parameter:
4963 .   mat - the matrix
4964 
4965    Level: advanced
4966 
4967    Notes:
4968        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4969 
4970        The i and j indices are 0 based
4971 
4972        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4973 
4974 
4975 .keywords: matrix, aij, compressed row, sparse, parallel
4976 
4977 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4978           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4979 @*/
4980 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4981 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4982 {
4983   PetscErrorCode ierr;
4984   Mat_MPIAIJ     *maij;
4985 
4986  PetscFunctionBegin;
4987   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4988   if (i[0]) {
4989     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4990   }
4991   if (oi[0]) {
4992     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4993   }
4994   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4995   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4996   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4997   maij = (Mat_MPIAIJ*) (*mat)->data;
4998   maij->donotstash     = PETSC_TRUE;
4999   (*mat)->preallocated = PETSC_TRUE;
5000 
5001   (*mat)->rmap.bs = (*mat)->cmap.bs = 1;
5002   ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr);
5003   ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr);
5004 
5005   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5006   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5007 
5008   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5009   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5010   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5011   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5012 
5013   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5014   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5015   PetscFunctionReturn(0);
5016 }
5017 
5018 /*
5019     Special version for direct calls from Fortran
5020 */
5021 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5022 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5023 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5024 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5025 #endif
5026 
5027 /* Change these macros so can be used in void function */
5028 #undef CHKERRQ
5029 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5030 #undef SETERRQ2
5031 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5032 #undef SETERRQ
5033 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5034 
5035 EXTERN_C_BEGIN
5036 #undef __FUNCT__
5037 #define __FUNCT__ "matsetvaluesmpiaij_"
5038 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5039 {
5040   Mat             mat = *mmat;
5041   PetscInt        m = *mm, n = *mn;
5042   InsertMode      addv = *maddv;
5043   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5044   PetscScalar     value;
5045   PetscErrorCode  ierr;
5046 
5047   MatPreallocated(mat);
5048   if (mat->insertmode == NOT_SET_VALUES) {
5049     mat->insertmode = addv;
5050   }
5051 #if defined(PETSC_USE_DEBUG)
5052   else if (mat->insertmode != addv) {
5053     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5054   }
5055 #endif
5056   {
5057   PetscInt        i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
5058   PetscInt        cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
5059   PetscTruth      roworiented = aij->roworiented;
5060 
5061   /* Some Variables required in the macro */
5062   Mat             A = aij->A;
5063   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5064   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5065   MatScalar       *aa = a->a;
5066   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5067   Mat             B = aij->B;
5068   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5069   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
5070   MatScalar       *ba = b->a;
5071 
5072   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5073   PetscInt        nonew = a->nonew;
5074   MatScalar       *ap1,*ap2;
5075 
5076   PetscFunctionBegin;
5077   for (i=0; i<m; i++) {
5078     if (im[i] < 0) continue;
5079 #if defined(PETSC_USE_DEBUG)
5080     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
5081 #endif
5082     if (im[i] >= rstart && im[i] < rend) {
5083       row      = im[i] - rstart;
5084       lastcol1 = -1;
5085       rp1      = aj + ai[row];
5086       ap1      = aa + ai[row];
5087       rmax1    = aimax[row];
5088       nrow1    = ailen[row];
5089       low1     = 0;
5090       high1    = nrow1;
5091       lastcol2 = -1;
5092       rp2      = bj + bi[row];
5093       ap2      = ba + bi[row];
5094       rmax2    = bimax[row];
5095       nrow2    = bilen[row];
5096       low2     = 0;
5097       high2    = nrow2;
5098 
5099       for (j=0; j<n; j++) {
5100         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5101         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5102         if (in[j] >= cstart && in[j] < cend){
5103           col = in[j] - cstart;
5104           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5105         } else if (in[j] < 0) continue;
5106 #if defined(PETSC_USE_DEBUG)
5107         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
5108 #endif
5109         else {
5110           if (mat->was_assembled) {
5111             if (!aij->colmap) {
5112               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5113             }
5114 #if defined (PETSC_USE_CTABLE)
5115             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5116 	    col--;
5117 #else
5118             col = aij->colmap[in[j]] - 1;
5119 #endif
5120             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5121               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5122               col =  in[j];
5123               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5124               B = aij->B;
5125               b = (Mat_SeqAIJ*)B->data;
5126               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5127               rp2      = bj + bi[row];
5128               ap2      = ba + bi[row];
5129               rmax2    = bimax[row];
5130               nrow2    = bilen[row];
5131               low2     = 0;
5132               high2    = nrow2;
5133               bm       = aij->B->rmap.n;
5134               ba = b->a;
5135             }
5136           } else col = in[j];
5137           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5138         }
5139       }
5140     } else {
5141       if (!aij->donotstash) {
5142         if (roworiented) {
5143           if (ignorezeroentries && v[i*n] == 0.0) continue;
5144           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
5145         } else {
5146           if (ignorezeroentries && v[i] == 0.0) continue;
5147           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
5148         }
5149       }
5150     }
5151   }}
5152   PetscFunctionReturnVoid();
5153 }
5154 EXTERN_C_END
5155 
5156