xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 01a79839fc82a7dabb7a87cd2a8bb532c6bfa88d)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "petscblaslapack.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
8 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
9 {
10   PetscErrorCode  ierr;
11   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
12   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
13   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
14   const PetscInt  *ia,*ib;
15   const MatScalar *aa,*bb;
16   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
17   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
18 
19   PetscFunctionBegin;
20   *keptrows = 0;
21   ia = a->i;
22   ib = b->i;
23   for (i=0; i<m; i++) {
24     na = ia[i+1] - ia[i];
25     nb = ib[i+1] - ib[i];
26     if (!na && !nb) {
27       cnt++;
28       goto ok1;
29     }
30     aa = a->a + ia[i];
31     for (j=0; j<na; j++) {
32       if (aa[j] != 0.0) goto ok1;
33     }
34     bb = b->a + ib[i];
35     for (j=0; j <nb; j++) {
36       if (bb[j] != 0.0) goto ok1;
37     }
38     cnt++;
39     ok1:;
40   }
41   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
42   if (!n0rows) PetscFunctionReturn(0);
43   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
44   cnt = 0;
45   for (i=0; i<m; i++) {
46     na = ia[i+1] - ia[i];
47     nb = ib[i+1] - ib[i];
48     if (!na && !nb) continue;
49     aa = a->a + ia[i];
50     for(j=0; j<na;j++) {
51       if (aa[j] != 0.0) {
52         rows[cnt++] = rstart + i;
53         goto ok2;
54       }
55     }
56     bb = b->a + ib[i];
57     for (j=0; j<nb; j++) {
58       if (bb[j] != 0.0) {
59         rows[cnt++] = rstart + i;
60         goto ok2;
61       }
62     }
63     ok2:;
64   }
65   ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 #undef __FUNCT__
70 #define __FUNCT__ "MatDistribute_MPIAIJ"
71 /*
72     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
73     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
74 
75     Only for square matrices
76 */
77 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
78 {
79   PetscMPIInt    rank,size;
80   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
81   PetscErrorCode ierr;
82   Mat            mat;
83   Mat_SeqAIJ     *gmata;
84   PetscMPIInt    tag;
85   MPI_Status     status;
86   PetscBool      aij;
87   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
88 
89   PetscFunctionBegin;
90   CHKMEMQ;
91   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
92   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
93   if (!rank) {
94     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
95     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
96   }
97   if (reuse == MAT_INITIAL_MATRIX) {
98     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
99     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
100     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
101     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
102     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
103     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
104     rowners[0] = 0;
105     for (i=2; i<=size; i++) {
106       rowners[i] += rowners[i-1];
107     }
108     rstart = rowners[rank];
109     rend   = rowners[rank+1];
110     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
111     if (!rank) {
112       gmata = (Mat_SeqAIJ*) gmat->data;
113       /* send row lengths to all processors */
114       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
115       for (i=1; i<size; i++) {
116 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
117       }
118       /* determine number diagonal and off-diagonal counts */
119       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
120       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
121       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
122       jj = 0;
123       for (i=0; i<m; i++) {
124 	for (j=0; j<dlens[i]; j++) {
125           if (gmata->j[jj] < rstart) ld[i]++;
126 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
127 	  jj++;
128 	}
129       }
130       /* send column indices to other processes */
131       for (i=1; i<size; i++) {
132 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
133 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
134 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
135       }
136 
137       /* send numerical values to other processes */
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       gmataa = gmata->a;
143       gmataj = gmata->j;
144 
145     } else {
146       /* receive row lengths */
147       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
148       /* receive column indices */
149       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
150       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
151       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
152       /* determine number diagonal and off-diagonal counts */
153       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
154       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
155       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
156       jj = 0;
157       for (i=0; i<m; i++) {
158 	for (j=0; j<dlens[i]; j++) {
159           if (gmataj[jj] < rstart) ld[i]++;
160 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
161 	  jj++;
162 	}
163       }
164       /* receive numerical values */
165       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
166       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
167     }
168     /* set preallocation */
169     for (i=0; i<m; i++) {
170       dlens[i] -= olens[i];
171     }
172     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
173     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
174 
175     for (i=0; i<m; i++) {
176       dlens[i] += olens[i];
177     }
178     cnt  = 0;
179     for (i=0; i<m; i++) {
180       row  = rstart + i;
181       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
182       cnt += dlens[i];
183     }
184     if (rank) {
185       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
186     }
187     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
188     ierr = PetscFree(rowners);CHKERRQ(ierr);
189     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
190     *inmat = mat;
191   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
192     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
193     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
194     mat   = *inmat;
195     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
196     if (!rank) {
197       /* send numerical values to other processes */
198       gmata = (Mat_SeqAIJ*) gmat->data;
199       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
200       gmataa = gmata->a;
201       for (i=1; i<size; i++) {
202         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
203         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
204       }
205       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
206     } else {
207       /* receive numerical values from process 0*/
208       nz   = Ad->nz + Ao->nz;
209       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
210       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
211     }
212     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
213     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
214     ad = Ad->a;
215     ao = Ao->a;
216     if (mat->rmap->n) {
217       i  = 0;
218       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
219       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
220     }
221     for (i=1; i<mat->rmap->n; i++) {
222       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
223       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
224     }
225     i--;
226     if (mat->rmap->n) {
227       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
228     }
229     if (rank) {
230       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
231     }
232   }
233   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
234   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
235   CHKMEMQ;
236   PetscFunctionReturn(0);
237 }
238 
239 /*
240   Local utility routine that creates a mapping from the global column
241 number to the local number in the off-diagonal part of the local
242 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
243 a slightly higher hash table cost; without it it is not scalable (each processor
244 has an order N integer array but is fast to acess.
245 */
246 #undef __FUNCT__
247 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
248 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
249 {
250   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
251   PetscErrorCode ierr;
252   PetscInt       n = aij->B->cmap->n,i;
253 
254   PetscFunctionBegin;
255 #if defined (PETSC_USE_CTABLE)
256   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
257   for (i=0; i<n; i++){
258     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
259   }
260 #else
261   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
262   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
263   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
264   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
265 #endif
266   PetscFunctionReturn(0);
267 }
268 
269 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
270 { \
271     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
272     lastcol1 = col;\
273     while (high1-low1 > 5) { \
274       t = (low1+high1)/2; \
275       if (rp1[t] > col) high1 = t; \
276       else             low1  = t; \
277     } \
278       for (_i=low1; _i<high1; _i++) { \
279         if (rp1[_i] > col) break; \
280         if (rp1[_i] == col) { \
281           if (addv == ADD_VALUES) ap1[_i] += value;   \
282           else                    ap1[_i] = value; \
283           goto a_noinsert; \
284         } \
285       }  \
286       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
287       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
288       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
289       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
290       N = nrow1++ - 1; a->nz++; high1++; \
291       /* shift up all the later entries in this row */ \
292       for (ii=N; ii>=_i; ii--) { \
293         rp1[ii+1] = rp1[ii]; \
294         ap1[ii+1] = ap1[ii]; \
295       } \
296       rp1[_i] = col;  \
297       ap1[_i] = value;  \
298       a_noinsert: ; \
299       ailen[row] = nrow1; \
300 }
301 
302 
303 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
304 { \
305     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
306     lastcol2 = col;\
307     while (high2-low2 > 5) { \
308       t = (low2+high2)/2; \
309       if (rp2[t] > col) high2 = t; \
310       else             low2  = t; \
311     } \
312     for (_i=low2; _i<high2; _i++) {		\
313       if (rp2[_i] > col) break;			\
314       if (rp2[_i] == col) {			      \
315 	if (addv == ADD_VALUES) ap2[_i] += value;     \
316 	else                    ap2[_i] = value;      \
317 	goto b_noinsert;			      \
318       }						      \
319     }							      \
320     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
321     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
322     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
323     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
324     N = nrow2++ - 1; b->nz++; high2++;					\
325     /* shift up all the later entries in this row */			\
326     for (ii=N; ii>=_i; ii--) {						\
327       rp2[ii+1] = rp2[ii];						\
328       ap2[ii+1] = ap2[ii];						\
329     }									\
330     rp2[_i] = col;							\
331     ap2[_i] = value;							\
332     b_noinsert: ;								\
333     bilen[row] = nrow2;							\
334 }
335 
336 #undef __FUNCT__
337 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
338 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
339 {
340   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
341   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
342   PetscErrorCode ierr;
343   PetscInt       l,*garray = mat->garray,diag;
344 
345   PetscFunctionBegin;
346   /* code only works for square matrices A */
347 
348   /* find size of row to the left of the diagonal part */
349   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
350   row  = row - diag;
351   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
352     if (garray[b->j[b->i[row]+l]] > diag) break;
353   }
354   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
355 
356   /* diagonal part */
357   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
358 
359   /* right of diagonal part */
360   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
361   PetscFunctionReturn(0);
362 }
363 
364 #undef __FUNCT__
365 #define __FUNCT__ "MatSetValues_MPIAIJ"
366 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
367 {
368   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
369   PetscScalar    value;
370   PetscErrorCode ierr;
371   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
372   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
373   PetscBool      roworiented = aij->roworiented;
374 
375   /* Some Variables required in the macro */
376   Mat            A = aij->A;
377   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
378   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
379   MatScalar      *aa = a->a;
380   PetscBool      ignorezeroentries = a->ignorezeroentries;
381   Mat            B = aij->B;
382   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
383   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
384   MatScalar      *ba = b->a;
385 
386   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
387   PetscInt       nonew = a->nonew;
388   MatScalar      *ap1,*ap2;
389 
390   PetscFunctionBegin;
391   if (v) PetscValidScalarPointer(v,6);
392   for (i=0; i<m; i++) {
393     if (im[i] < 0) continue;
394 #if defined(PETSC_USE_DEBUG)
395     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
396 #endif
397     if (im[i] >= rstart && im[i] < rend) {
398       row      = im[i] - rstart;
399       lastcol1 = -1;
400       rp1      = aj + ai[row];
401       ap1      = aa + ai[row];
402       rmax1    = aimax[row];
403       nrow1    = ailen[row];
404       low1     = 0;
405       high1    = nrow1;
406       lastcol2 = -1;
407       rp2      = bj + bi[row];
408       ap2      = ba + bi[row];
409       rmax2    = bimax[row];
410       nrow2    = bilen[row];
411       low2     = 0;
412       high2    = nrow2;
413 
414       for (j=0; j<n; j++) {
415         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
416         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
417         if (in[j] >= cstart && in[j] < cend){
418           col = in[j] - cstart;
419           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
420         } else if (in[j] < 0) continue;
421 #if defined(PETSC_USE_DEBUG)
422         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
423 #endif
424         else {
425           if (mat->was_assembled) {
426             if (!aij->colmap) {
427               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
428             }
429 #if defined (PETSC_USE_CTABLE)
430             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
431 	    col--;
432 #else
433             col = aij->colmap[in[j]] - 1;
434 #endif
435             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
436               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
437               col =  in[j];
438               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
439               B = aij->B;
440               b = (Mat_SeqAIJ*)B->data;
441               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
442               rp2      = bj + bi[row];
443               ap2      = ba + bi[row];
444               rmax2    = bimax[row];
445               nrow2    = bilen[row];
446               low2     = 0;
447               high2    = nrow2;
448               bm       = aij->B->rmap->n;
449               ba = b->a;
450             }
451           } else col = in[j];
452           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
453         }
454       }
455     } else {
456       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
457       if (!aij->donotstash) {
458         if (roworiented) {
459           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
460         } else {
461           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
462         }
463       }
464     }
465   }
466   PetscFunctionReturn(0);
467 }
468 
469 #undef __FUNCT__
470 #define __FUNCT__ "MatGetValues_MPIAIJ"
471 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
472 {
473   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
474   PetscErrorCode ierr;
475   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
476   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
477 
478   PetscFunctionBegin;
479   for (i=0; i<m; i++) {
480     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
481     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
482     if (idxm[i] >= rstart && idxm[i] < rend) {
483       row = idxm[i] - rstart;
484       for (j=0; j<n; j++) {
485         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
486         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
487         if (idxn[j] >= cstart && idxn[j] < cend){
488           col = idxn[j] - cstart;
489           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
490         } else {
491           if (!aij->colmap) {
492             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
493           }
494 #if defined (PETSC_USE_CTABLE)
495           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
496           col --;
497 #else
498           col = aij->colmap[idxn[j]] - 1;
499 #endif
500           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
501           else {
502             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
503           }
504         }
505       }
506     } else {
507       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
508     }
509   }
510   PetscFunctionReturn(0);
511 }
512 
513 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
514 
515 #undef __FUNCT__
516 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
517 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
518 {
519   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
520   PetscErrorCode ierr;
521   PetscInt       nstash,reallocs;
522   InsertMode     addv;
523 
524   PetscFunctionBegin;
525   if (aij->donotstash || mat->nooffprocentries) {
526     PetscFunctionReturn(0);
527   }
528 
529   /* make sure all processors are either in INSERTMODE or ADDMODE */
530   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
531   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
532   mat->insertmode = addv; /* in case this processor had no cache */
533 
534   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
535   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
536   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
537   PetscFunctionReturn(0);
538 }
539 
540 #undef __FUNCT__
541 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
542 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
543 {
544   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
545   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
546   PetscErrorCode ierr;
547   PetscMPIInt    n;
548   PetscInt       i,j,rstart,ncols,flg;
549   PetscInt       *row,*col;
550   PetscBool      other_disassembled;
551   PetscScalar    *val;
552   InsertMode     addv = mat->insertmode;
553 
554   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
555   PetscFunctionBegin;
556   if (!aij->donotstash && !mat->nooffprocentries) {
557     while (1) {
558       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
559       if (!flg) break;
560 
561       for (i=0; i<n;) {
562         /* Now identify the consecutive vals belonging to the same row */
563         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
564         if (j < n) ncols = j-i;
565         else       ncols = n-i;
566         /* Now assemble all these values with a single function call */
567         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
568         i = j;
569       }
570     }
571     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
572   }
573   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
574   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
575 
576   /* determine if any processor has disassembled, if so we must
577      also disassemble ourselfs, in order that we may reassemble. */
578   /*
579      if nonzero structure of submatrix B cannot change then we know that
580      no processor disassembled thus we can skip this stuff
581   */
582   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
583     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
584     if (mat->was_assembled && !other_disassembled) {
585       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
586     }
587   }
588   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
589     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
590   }
591   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
592   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
593   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
594   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
595 
596   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
597   aij->rowvalues = 0;
598 
599   /* used by MatAXPY() */
600   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
601   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
602 
603   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
604   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
605   PetscFunctionReturn(0);
606 }
607 
608 #undef __FUNCT__
609 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
610 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
611 {
612   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
613   PetscErrorCode ierr;
614 
615   PetscFunctionBegin;
616   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
617   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
618   PetscFunctionReturn(0);
619 }
620 
621 #undef __FUNCT__
622 #define __FUNCT__ "MatZeroRows_MPIAIJ"
623 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
624 {
625   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
626   PetscErrorCode    ierr;
627   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
628   PetscInt          i,*owners = A->rmap->range;
629   PetscInt          *nprocs,j,idx,nsends,row;
630   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
631   PetscInt          *rvalues,count,base,slen,*source;
632   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
633   MPI_Comm          comm = ((PetscObject)A)->comm;
634   MPI_Request       *send_waits,*recv_waits;
635   MPI_Status        recv_status,*send_status;
636   const PetscScalar *xx;
637   PetscScalar       *bb;
638 #if defined(PETSC_DEBUG)
639   PetscBool      found = PETSC_FALSE;
640 #endif
641 
642   PetscFunctionBegin;
643   /*  first count number of contributors to each processor */
644   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
645   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
646   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
647   j = 0;
648   for (i=0; i<N; i++) {
649     if (lastidx > (idx = rows[i])) j = 0;
650     lastidx = idx;
651     for (; j<size; j++) {
652       if (idx >= owners[j] && idx < owners[j+1]) {
653         nprocs[2*j]++;
654         nprocs[2*j+1] = 1;
655         owner[i] = j;
656 #if defined(PETSC_DEBUG)
657         found = PETSC_TRUE;
658 #endif
659         break;
660       }
661     }
662 #if defined(PETSC_DEBUG)
663     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
664     found = PETSC_FALSE;
665 #endif
666   }
667   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
668 
669   if (A->nooffproczerorows) {
670     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
671     nrecvs = nsends;
672     nmax   = N;
673   } else {
674     /* inform other processors of number of messages and max length*/
675     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
676   }
677 
678   /* post receives:   */
679   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
680   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
681   for (i=0; i<nrecvs; i++) {
682     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
683   }
684 
685   /* do sends:
686       1) starts[i] gives the starting index in svalues for stuff going to
687          the ith processor
688   */
689   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
690   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
691   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
692   starts[0] = 0;
693   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
694   for (i=0; i<N; i++) {
695     svalues[starts[owner[i]]++] = rows[i];
696   }
697 
698   starts[0] = 0;
699   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
700   count = 0;
701   for (i=0; i<size; i++) {
702     if (nprocs[2*i+1]) {
703       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
704     }
705   }
706   ierr = PetscFree(starts);CHKERRQ(ierr);
707 
708   base = owners[rank];
709 
710   /*  wait on receives */
711   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
712   count  = nrecvs; slen = 0;
713   while (count) {
714     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
715     /* unpack receives into our local space */
716     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
717     source[imdex]  = recv_status.MPI_SOURCE;
718     lens[imdex]    = n;
719     slen          += n;
720     count--;
721   }
722   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
723 
724   /* move the data into the send scatter */
725   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
726   count = 0;
727   for (i=0; i<nrecvs; i++) {
728     values = rvalues + i*nmax;
729     for (j=0; j<lens[i]; j++) {
730       lrows[count++] = values[j] - base;
731     }
732   }
733   ierr = PetscFree(rvalues);CHKERRQ(ierr);
734   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
735   ierr = PetscFree(owner);CHKERRQ(ierr);
736   ierr = PetscFree(nprocs);CHKERRQ(ierr);
737 
738   /* fix right hand side if needed */
739   if (x && b) {
740     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
741     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
742     for (i=0; i<slen; i++) {
743       bb[lrows[i]] = diag*xx[lrows[i]];
744     }
745     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
746     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
747   }
748   /*
749         Zero the required rows. If the "diagonal block" of the matrix
750      is square and the user wishes to set the diagonal we use separate
751      code so that MatSetValues() is not called for each diagonal allocating
752      new memory, thus calling lots of mallocs and slowing things down.
753 
754   */
755   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
756   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
757   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
758     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
759   } else if (diag != 0.0) {
760     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
761     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
762       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
763 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
764     }
765     for (i = 0; i < slen; i++) {
766       row  = lrows[i] + rstart;
767       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
768     }
769     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
770     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
771   } else {
772     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
773   }
774   ierr = PetscFree(lrows);CHKERRQ(ierr);
775 
776   /* wait on sends */
777   if (nsends) {
778     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
779     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
780     ierr = PetscFree(send_status);CHKERRQ(ierr);
781   }
782   ierr = PetscFree(send_waits);CHKERRQ(ierr);
783   ierr = PetscFree(svalues);CHKERRQ(ierr);
784 
785   PetscFunctionReturn(0);
786 }
787 
788 #undef __FUNCT__
789 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
790 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
791 {
792   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
793   PetscErrorCode    ierr;
794   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
795   PetscInt          i,*owners = A->rmap->range;
796   PetscInt          *nprocs,j,idx,nsends;
797   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
798   PetscInt          *rvalues,count,base,slen,*source;
799   PetscInt          *lens,*lrows,*values,m;
800   MPI_Comm          comm = ((PetscObject)A)->comm;
801   MPI_Request       *send_waits,*recv_waits;
802   MPI_Status        recv_status,*send_status;
803   const PetscScalar *xx;
804   PetscScalar       *bb,*mask;
805   Vec               xmask,lmask;
806   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
807   const PetscInt    *aj, *ii,*ridx;
808   PetscScalar       *aa;
809 #if defined(PETSC_DEBUG)
810   PetscBool         found = PETSC_FALSE;
811 #endif
812 
813   PetscFunctionBegin;
814   /*  first count number of contributors to each processor */
815   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
816   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
817   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
818   j = 0;
819   for (i=0; i<N; i++) {
820     if (lastidx > (idx = rows[i])) j = 0;
821     lastidx = idx;
822     for (; j<size; j++) {
823       if (idx >= owners[j] && idx < owners[j+1]) {
824         nprocs[2*j]++;
825         nprocs[2*j+1] = 1;
826         owner[i] = j;
827 #if defined(PETSC_DEBUG)
828         found = PETSC_TRUE;
829 #endif
830         break;
831       }
832     }
833 #if defined(PETSC_DEBUG)
834     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
835     found = PETSC_FALSE;
836 #endif
837   }
838   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
839 
840   /* inform other processors of number of messages and max length*/
841   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
842 
843   /* post receives:   */
844   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
845   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
846   for (i=0; i<nrecvs; i++) {
847     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
848   }
849 
850   /* do sends:
851       1) starts[i] gives the starting index in svalues for stuff going to
852          the ith processor
853   */
854   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
855   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
856   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
857   starts[0] = 0;
858   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
859   for (i=0; i<N; i++) {
860     svalues[starts[owner[i]]++] = rows[i];
861   }
862 
863   starts[0] = 0;
864   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
865   count = 0;
866   for (i=0; i<size; i++) {
867     if (nprocs[2*i+1]) {
868       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
869     }
870   }
871   ierr = PetscFree(starts);CHKERRQ(ierr);
872 
873   base = owners[rank];
874 
875   /*  wait on receives */
876   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
877   count  = nrecvs; slen = 0;
878   while (count) {
879     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
880     /* unpack receives into our local space */
881     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
882     source[imdex]  = recv_status.MPI_SOURCE;
883     lens[imdex]    = n;
884     slen          += n;
885     count--;
886   }
887   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
888 
889   /* move the data into the send scatter */
890   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
891   count = 0;
892   for (i=0; i<nrecvs; i++) {
893     values = rvalues + i*nmax;
894     for (j=0; j<lens[i]; j++) {
895       lrows[count++] = values[j] - base;
896     }
897   }
898   ierr = PetscFree(rvalues);CHKERRQ(ierr);
899   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
900   ierr = PetscFree(owner);CHKERRQ(ierr);
901   ierr = PetscFree(nprocs);CHKERRQ(ierr);
902   /* lrows are the local rows to be zeroed, slen is the number of local rows */
903 
904   /* zero diagonal part of matrix */
905   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
906 
907   /* handle off diagonal part of matrix */
908   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
909   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
910   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
911   for (i=0; i<slen; i++) {
912     bb[lrows[i]] = 1;
913   }
914   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
915   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
917   ierr = VecDestroy(xmask);CHKERRQ(ierr);
918   ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
919   ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
920   ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
921   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
922   ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
923 
924   /* remove zeroed rows of off diagonal matrix */
925   ii = aij->i;
926   for (i=0; i<slen; i++) {
927     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
928   }
929 
930   /* loop over all elements of off process part of matrix zeroing removed columns*/
931   if (aij->compressedrow.use){
932     m    = aij->compressedrow.nrows;
933     ii   = aij->compressedrow.i;
934     ridx = aij->compressedrow.rindex;
935     for (i=0; i<m; i++){
936       n   = ii[i+1] - ii[i];
937       aj  = aij->j + ii[i];
938       aa  = aij->a + ii[i];
939 
940       for (j=0; j<n; j++) {
941         if (PetscAbsScalar(mask[*aj])) {
942           bb[*ridx] -= *aa*xx[*aj];
943           *aa        = 0.0;
944         }
945         aa++;
946         aj++;
947       }
948       ridx++;
949     }
950   } else { /* do not use compressed row format */
951     m = l->B->rmap->n;
952    for (i=0; i<m; i++) {
953      n   = ii[i+1] - ii[i];
954      aj  = aij->j + ii[i];
955      aa  = aij->a + ii[i];
956      for (j=0; j<n; j++) {
957         if (PetscAbsScalar(mask[*aj])) {
958           bb[i] -= *aa*xx[*aj];
959           *aa    = 0.0;
960         }
961         aa++;
962         aj++;
963       }
964     }
965   }
966 
967   ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
968   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
969   ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
970   ierr = VecDestroy(lmask);CHKERRQ(ierr);
971   ierr = PetscFree(lrows);CHKERRQ(ierr);
972 
973   /* wait on sends */
974   if (nsends) {
975     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
976     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
977     ierr = PetscFree(send_status);CHKERRQ(ierr);
978   }
979   ierr = PetscFree(send_waits);CHKERRQ(ierr);
980   ierr = PetscFree(svalues);CHKERRQ(ierr);
981 
982   PetscFunctionReturn(0);
983 }
984 
985 #undef __FUNCT__
986 #define __FUNCT__ "MatMult_MPIAIJ"
987 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
988 {
989   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
990   PetscErrorCode ierr;
991   PetscInt       nt;
992 
993   PetscFunctionBegin;
994   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
995   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
996   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
997   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
998   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
999   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 #undef __FUNCT__
1004 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1005 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1006 {
1007   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1008   PetscErrorCode ierr;
1009 
1010   PetscFunctionBegin;
1011   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1012   PetscFunctionReturn(0);
1013 }
1014 
1015 #undef __FUNCT__
1016 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1017 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1018 {
1019   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1020   PetscErrorCode ierr;
1021 
1022   PetscFunctionBegin;
1023   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1024   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1025   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1026   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1027   PetscFunctionReturn(0);
1028 }
1029 
1030 #undef __FUNCT__
1031 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1032 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1033 {
1034   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1035   PetscErrorCode ierr;
1036   PetscBool      merged;
1037 
1038   PetscFunctionBegin;
1039   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1040   /* do nondiagonal part */
1041   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1042   if (!merged) {
1043     /* send it on its way */
1044     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1045     /* do local part */
1046     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1047     /* receive remote parts: note this assumes the values are not actually */
1048     /* added in yy until the next line, */
1049     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1050   } else {
1051     /* do local part */
1052     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1053     /* send it on its way */
1054     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1055     /* values actually were received in the Begin() but we need to call this nop */
1056     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1057   }
1058   PetscFunctionReturn(0);
1059 }
1060 
1061 EXTERN_C_BEGIN
1062 #undef __FUNCT__
1063 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1064 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1065 {
1066   MPI_Comm       comm;
1067   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1068   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1069   IS             Me,Notme;
1070   PetscErrorCode ierr;
1071   PetscInt       M,N,first,last,*notme,i;
1072   PetscMPIInt    size;
1073 
1074   PetscFunctionBegin;
1075 
1076   /* Easy test: symmetric diagonal block */
1077   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1078   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1079   if (!*f) PetscFunctionReturn(0);
1080   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1081   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1082   if (size == 1) PetscFunctionReturn(0);
1083 
1084   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1085   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1086   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1087   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1088   for (i=0; i<first; i++) notme[i] = i;
1089   for (i=last; i<M; i++) notme[i-last+first] = i;
1090   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1091   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1092   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1093   Aoff = Aoffs[0];
1094   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1095   Boff = Boffs[0];
1096   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1097   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1098   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1099   ierr = ISDestroy(Me);CHKERRQ(ierr);
1100   ierr = ISDestroy(Notme);CHKERRQ(ierr);
1101 
1102   PetscFunctionReturn(0);
1103 }
1104 EXTERN_C_END
1105 
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1108 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1109 {
1110   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1111   PetscErrorCode ierr;
1112 
1113   PetscFunctionBegin;
1114   /* do nondiagonal part */
1115   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1116   /* send it on its way */
1117   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1118   /* do local part */
1119   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1120   /* receive remote parts */
1121   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 /*
1126   This only works correctly for square matrices where the subblock A->A is the
1127    diagonal block
1128 */
1129 #undef __FUNCT__
1130 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1131 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1132 {
1133   PetscErrorCode ierr;
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135 
1136   PetscFunctionBegin;
1137   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1138   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1139   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 #undef __FUNCT__
1144 #define __FUNCT__ "MatScale_MPIAIJ"
1145 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1146 {
1147   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1148   PetscErrorCode ierr;
1149 
1150   PetscFunctionBegin;
1151   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1152   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 #undef __FUNCT__
1157 #define __FUNCT__ "MatDestroy_MPIAIJ"
1158 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1159 {
1160   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1161   PetscErrorCode ierr;
1162 
1163   PetscFunctionBegin;
1164 #if defined(PETSC_USE_LOG)
1165   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1166 #endif
1167   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1168   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
1169   if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);}
1170   if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);}
1171 #if defined (PETSC_USE_CTABLE)
1172   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
1173 #else
1174   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1175 #endif
1176   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1177   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
1178   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
1179   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1180   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1181   ierr = PetscFree(aij);CHKERRQ(ierr);
1182 
1183   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1185   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1186   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1188   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1189   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1190   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1191   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 #undef __FUNCT__
1196 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1197 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1198 {
1199   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1200   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1201   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1202   PetscErrorCode    ierr;
1203   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1204   int               fd;
1205   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1206   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1207   PetscScalar       *column_values;
1208   PetscInt          message_count,flowcontrolcount;
1209 
1210   PetscFunctionBegin;
1211   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1212   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1213   nz   = A->nz + B->nz;
1214   if (!rank) {
1215     header[0] = MAT_FILE_CLASSID;
1216     header[1] = mat->rmap->N;
1217     header[2] = mat->cmap->N;
1218     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1219     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1220     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1221     /* get largest number of rows any processor has */
1222     rlen = mat->rmap->n;
1223     range = mat->rmap->range;
1224     for (i=1; i<size; i++) {
1225       rlen = PetscMax(rlen,range[i+1] - range[i]);
1226     }
1227   } else {
1228     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1229     rlen = mat->rmap->n;
1230   }
1231 
1232   /* load up the local row counts */
1233   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1234   for (i=0; i<mat->rmap->n; i++) {
1235     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1236   }
1237 
1238   /* store the row lengths to the file */
1239   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1240   if (!rank) {
1241     MPI_Status status;
1242     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1243     for (i=1; i<size; i++) {
1244       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1245       rlen = range[i+1] - range[i];
1246       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1247       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1248     }
1249     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1250   } else {
1251     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1252     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1253     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1254   }
1255   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1256 
1257   /* load up the local column indices */
1258   nzmax = nz; /* )th processor needs space a largest processor needs */
1259   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1260   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1261   cnt  = 0;
1262   for (i=0; i<mat->rmap->n; i++) {
1263     for (j=B->i[i]; j<B->i[i+1]; j++) {
1264       if ( (col = garray[B->j[j]]) > cstart) break;
1265       column_indices[cnt++] = col;
1266     }
1267     for (k=A->i[i]; k<A->i[i+1]; k++) {
1268       column_indices[cnt++] = A->j[k] + cstart;
1269     }
1270     for (; j<B->i[i+1]; j++) {
1271       column_indices[cnt++] = garray[B->j[j]];
1272     }
1273   }
1274   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1275 
1276   /* store the column indices to the file */
1277    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1278   if (!rank) {
1279     MPI_Status status;
1280     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1281     for (i=1; i<size; i++) {
1282       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1283       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1284       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1285       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1286       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1287     }
1288      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1289   } else {
1290     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1291     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1292     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1293     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1294   }
1295   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1296 
1297   /* load up the local column values */
1298   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1299   cnt  = 0;
1300   for (i=0; i<mat->rmap->n; i++) {
1301     for (j=B->i[i]; j<B->i[i+1]; j++) {
1302       if ( garray[B->j[j]] > cstart) break;
1303       column_values[cnt++] = B->a[j];
1304     }
1305     for (k=A->i[i]; k<A->i[i+1]; k++) {
1306       column_values[cnt++] = A->a[k];
1307     }
1308     for (; j<B->i[i+1]; j++) {
1309       column_values[cnt++] = B->a[j];
1310     }
1311   }
1312   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1313 
1314   /* store the column values to the file */
1315    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1316   if (!rank) {
1317     MPI_Status status;
1318     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1319     for (i=1; i<size; i++) {
1320        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1321       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1322       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1323       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1324       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1325     }
1326     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1327   } else {
1328     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1329     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1330     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1331     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1332   }
1333   ierr = PetscFree(column_values);CHKERRQ(ierr);
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 #undef __FUNCT__
1338 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1339 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1340 {
1341   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1342   PetscErrorCode    ierr;
1343   PetscMPIInt       rank = aij->rank,size = aij->size;
1344   PetscBool         isdraw,iascii,isbinary;
1345   PetscViewer       sviewer;
1346   PetscViewerFormat format;
1347 
1348   PetscFunctionBegin;
1349   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1350   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1351   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1352   if (iascii) {
1353     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1354     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1355       MatInfo    info;
1356       PetscBool  inodes;
1357 
1358       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1359       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1360       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1361       if (!inodes) {
1362         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1363 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1364       } else {
1365         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1366 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1367       }
1368       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1369       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1370       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1371       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1372       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1373       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1374       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1375       PetscFunctionReturn(0);
1376     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1377       PetscInt   inodecount,inodelimit,*inodes;
1378       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1379       if (inodes) {
1380         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1381       } else {
1382         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1383       }
1384       PetscFunctionReturn(0);
1385     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1386       PetscFunctionReturn(0);
1387     }
1388   } else if (isbinary) {
1389     if (size == 1) {
1390       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1391       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1392     } else {
1393       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1394     }
1395     PetscFunctionReturn(0);
1396   } else if (isdraw) {
1397     PetscDraw  draw;
1398     PetscBool  isnull;
1399     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1400     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1401   }
1402 
1403   if (size == 1) {
1404     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1405     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1406   } else {
1407     /* assemble the entire matrix onto first processor. */
1408     Mat         A;
1409     Mat_SeqAIJ  *Aloc;
1410     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1411     MatScalar   *a;
1412 
1413     if (mat->rmap->N > 1024) {
1414       PetscBool  flg = PETSC_FALSE;
1415 
1416       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1417       if (!flg) {
1418         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1419       }
1420     }
1421 
1422     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1423     if (!rank) {
1424       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1425     } else {
1426       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1427     }
1428     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1429     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1430     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1431     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1432 
1433     /* copy over the A part */
1434     Aloc = (Mat_SeqAIJ*)aij->A->data;
1435     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1436     row = mat->rmap->rstart;
1437     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1438     for (i=0; i<m; i++) {
1439       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1440       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1441     }
1442     aj = Aloc->j;
1443     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1444 
1445     /* copy over the B part */
1446     Aloc = (Mat_SeqAIJ*)aij->B->data;
1447     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1448     row  = mat->rmap->rstart;
1449     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1450     ct   = cols;
1451     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1452     for (i=0; i<m; i++) {
1453       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1454       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1455     }
1456     ierr = PetscFree(ct);CHKERRQ(ierr);
1457     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1458     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1459     /*
1460        Everyone has to call to draw the matrix since the graphics waits are
1461        synchronized across all processors that share the PetscDraw object
1462     */
1463     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1464     if (!rank) {
1465       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1466       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1467       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1468       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1469     }
1470     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1471     ierr = MatDestroy(A);CHKERRQ(ierr);
1472   }
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 #undef __FUNCT__
1477 #define __FUNCT__ "MatView_MPIAIJ"
1478 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1479 {
1480   PetscErrorCode ierr;
1481   PetscBool      iascii,isdraw,issocket,isbinary;
1482 
1483   PetscFunctionBegin;
1484   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1485   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1486   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1487   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1488   if (iascii || isdraw || isbinary || issocket) {
1489     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1490   } else {
1491     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1492   }
1493   PetscFunctionReturn(0);
1494 }
1495 
1496 #undef __FUNCT__
1497 #define __FUNCT__ "MatSOR_MPIAIJ"
1498 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1499 {
1500   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1501   PetscErrorCode ierr;
1502   Vec            bb1 = 0;
1503   PetscBool      hasop;
1504 
1505   PetscFunctionBegin;
1506   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1507     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1508   }
1509 
1510   if (flag == SOR_APPLY_UPPER) {
1511     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1512     PetscFunctionReturn(0);
1513   }
1514 
1515   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1516     if (flag & SOR_ZERO_INITIAL_GUESS) {
1517       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1518       its--;
1519     }
1520 
1521     while (its--) {
1522       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1523       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1524 
1525       /* update rhs: bb1 = bb - B*x */
1526       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1527       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1528 
1529       /* local sweep */
1530       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1531     }
1532   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1533     if (flag & SOR_ZERO_INITIAL_GUESS) {
1534       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1535       its--;
1536     }
1537     while (its--) {
1538       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1539       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1540 
1541       /* update rhs: bb1 = bb - B*x */
1542       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1543       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1544 
1545       /* local sweep */
1546       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1547     }
1548   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1549     if (flag & SOR_ZERO_INITIAL_GUESS) {
1550       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1551       its--;
1552     }
1553     while (its--) {
1554       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1555       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1556 
1557       /* update rhs: bb1 = bb - B*x */
1558       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1559       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1560 
1561       /* local sweep */
1562       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1563     }
1564   }  else if (flag & SOR_EISENSTAT) {
1565     Vec         xx1;
1566 
1567     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1568     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1569 
1570     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1571     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1572     if (!mat->diag) {
1573       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1574       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1575     }
1576     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1577     if (hasop) {
1578       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1579     } else {
1580       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1581     }
1582     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1583 
1584     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1585 
1586     /* local sweep */
1587     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1588     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1589     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1590   } else {
1591     SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1592   }
1593 
1594   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1595   PetscFunctionReturn(0);
1596 }
1597 
1598 #undef __FUNCT__
1599 #define __FUNCT__ "MatPermute_MPIAIJ"
1600 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1601 {
1602   MPI_Comm       comm,pcomm;
1603   PetscInt       first,local_size,nrows;
1604   const PetscInt *rows;
1605   PetscMPIInt    size;
1606   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1607   PetscErrorCode ierr;
1608 
1609   PetscFunctionBegin;
1610   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1611   /* make a collective version of 'rowp' */
1612   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1613   if (pcomm==comm) {
1614     crowp = rowp;
1615   } else {
1616     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1617     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1618     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
1619     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1620   }
1621   /* collect the global row permutation and invert it */
1622   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1623   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1624   if (pcomm!=comm) {
1625     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1626   }
1627   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1628   /* get the local target indices */
1629   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1630   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1631   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1632   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1633   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1634   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1635   /* the column permutation is so much easier;
1636      make a local version of 'colp' and invert it */
1637   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1638   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1639   if (size==1) {
1640     lcolp = colp;
1641   } else {
1642     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1643     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1644     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
1645   }
1646   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1647   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1648   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1649   if (size>1) {
1650     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1651     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1652   }
1653   /* now we just get the submatrix */
1654   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1655   /* clean up */
1656   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1657   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1658   PetscFunctionReturn(0);
1659 }
1660 
1661 #undef __FUNCT__
1662 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1663 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1664 {
1665   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1666   Mat            A = mat->A,B = mat->B;
1667   PetscErrorCode ierr;
1668   PetscReal      isend[5],irecv[5];
1669 
1670   PetscFunctionBegin;
1671   info->block_size     = 1.0;
1672   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1673   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1674   isend[3] = info->memory;  isend[4] = info->mallocs;
1675   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1676   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1677   isend[3] += info->memory;  isend[4] += info->mallocs;
1678   if (flag == MAT_LOCAL) {
1679     info->nz_used      = isend[0];
1680     info->nz_allocated = isend[1];
1681     info->nz_unneeded  = isend[2];
1682     info->memory       = isend[3];
1683     info->mallocs      = isend[4];
1684   } else if (flag == MAT_GLOBAL_MAX) {
1685     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1686     info->nz_used      = irecv[0];
1687     info->nz_allocated = irecv[1];
1688     info->nz_unneeded  = irecv[2];
1689     info->memory       = irecv[3];
1690     info->mallocs      = irecv[4];
1691   } else if (flag == MAT_GLOBAL_SUM) {
1692     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1693     info->nz_used      = irecv[0];
1694     info->nz_allocated = irecv[1];
1695     info->nz_unneeded  = irecv[2];
1696     info->memory       = irecv[3];
1697     info->mallocs      = irecv[4];
1698   }
1699   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1700   info->fill_ratio_needed = 0;
1701   info->factor_mallocs    = 0;
1702 
1703   PetscFunctionReturn(0);
1704 }
1705 
1706 #undef __FUNCT__
1707 #define __FUNCT__ "MatSetOption_MPIAIJ"
1708 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1709 {
1710   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1711   PetscErrorCode ierr;
1712 
1713   PetscFunctionBegin;
1714   switch (op) {
1715   case MAT_NEW_NONZERO_LOCATIONS:
1716   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1717   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1718   case MAT_KEEP_NONZERO_PATTERN:
1719   case MAT_NEW_NONZERO_LOCATION_ERR:
1720   case MAT_USE_INODES:
1721   case MAT_IGNORE_ZERO_ENTRIES:
1722     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1723     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1724     break;
1725   case MAT_ROW_ORIENTED:
1726     a->roworiented = flg;
1727     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1728     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1729     break;
1730   case MAT_NEW_DIAGONALS:
1731     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1732     break;
1733   case MAT_IGNORE_OFF_PROC_ENTRIES:
1734     a->donotstash = PETSC_TRUE;
1735     break;
1736   case MAT_SPD:
1737     A->spd_set                         = PETSC_TRUE;
1738     A->spd                             = flg;
1739     if (flg) {
1740       A->symmetric                     = PETSC_TRUE;
1741       A->structurally_symmetric        = PETSC_TRUE;
1742       A->symmetric_set                 = PETSC_TRUE;
1743       A->structurally_symmetric_set    = PETSC_TRUE;
1744     }
1745     break;
1746   case MAT_SYMMETRIC:
1747     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1748     break;
1749   case MAT_STRUCTURALLY_SYMMETRIC:
1750     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1751     break;
1752   case MAT_HERMITIAN:
1753     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1754     break;
1755   case MAT_SYMMETRY_ETERNAL:
1756     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1757     break;
1758   default:
1759     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1760   }
1761   PetscFunctionReturn(0);
1762 }
1763 
1764 #undef __FUNCT__
1765 #define __FUNCT__ "MatGetRow_MPIAIJ"
1766 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1767 {
1768   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1769   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1770   PetscErrorCode ierr;
1771   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1772   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1773   PetscInt       *cmap,*idx_p;
1774 
1775   PetscFunctionBegin;
1776   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1777   mat->getrowactive = PETSC_TRUE;
1778 
1779   if (!mat->rowvalues && (idx || v)) {
1780     /*
1781         allocate enough space to hold information from the longest row.
1782     */
1783     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1784     PetscInt   max = 1,tmp;
1785     for (i=0; i<matin->rmap->n; i++) {
1786       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1787       if (max < tmp) { max = tmp; }
1788     }
1789     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1790   }
1791 
1792   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1793   lrow = row - rstart;
1794 
1795   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1796   if (!v)   {pvA = 0; pvB = 0;}
1797   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1798   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1799   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1800   nztot = nzA + nzB;
1801 
1802   cmap  = mat->garray;
1803   if (v  || idx) {
1804     if (nztot) {
1805       /* Sort by increasing column numbers, assuming A and B already sorted */
1806       PetscInt imark = -1;
1807       if (v) {
1808         *v = v_p = mat->rowvalues;
1809         for (i=0; i<nzB; i++) {
1810           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1811           else break;
1812         }
1813         imark = i;
1814         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1815         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1816       }
1817       if (idx) {
1818         *idx = idx_p = mat->rowindices;
1819         if (imark > -1) {
1820           for (i=0; i<imark; i++) {
1821             idx_p[i] = cmap[cworkB[i]];
1822           }
1823         } else {
1824           for (i=0; i<nzB; i++) {
1825             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1826             else break;
1827           }
1828           imark = i;
1829         }
1830         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1831         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1832       }
1833     } else {
1834       if (idx) *idx = 0;
1835       if (v)   *v   = 0;
1836     }
1837   }
1838   *nz = nztot;
1839   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1840   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1841   PetscFunctionReturn(0);
1842 }
1843 
1844 #undef __FUNCT__
1845 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1846 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1847 {
1848   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1849 
1850   PetscFunctionBegin;
1851   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1852   aij->getrowactive = PETSC_FALSE;
1853   PetscFunctionReturn(0);
1854 }
1855 
1856 #undef __FUNCT__
1857 #define __FUNCT__ "MatNorm_MPIAIJ"
1858 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1859 {
1860   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1861   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1862   PetscErrorCode ierr;
1863   PetscInt       i,j,cstart = mat->cmap->rstart;
1864   PetscReal      sum = 0.0;
1865   MatScalar      *v;
1866 
1867   PetscFunctionBegin;
1868   if (aij->size == 1) {
1869     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1870   } else {
1871     if (type == NORM_FROBENIUS) {
1872       v = amat->a;
1873       for (i=0; i<amat->nz; i++) {
1874 #if defined(PETSC_USE_COMPLEX)
1875         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1876 #else
1877         sum += (*v)*(*v); v++;
1878 #endif
1879       }
1880       v = bmat->a;
1881       for (i=0; i<bmat->nz; i++) {
1882 #if defined(PETSC_USE_COMPLEX)
1883         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1884 #else
1885         sum += (*v)*(*v); v++;
1886 #endif
1887       }
1888       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1889       *norm = sqrt(*norm);
1890     } else if (type == NORM_1) { /* max column norm */
1891       PetscReal *tmp,*tmp2;
1892       PetscInt  *jj,*garray = aij->garray;
1893       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1894       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1895       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1896       *norm = 0.0;
1897       v = amat->a; jj = amat->j;
1898       for (j=0; j<amat->nz; j++) {
1899         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1900       }
1901       v = bmat->a; jj = bmat->j;
1902       for (j=0; j<bmat->nz; j++) {
1903         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1904       }
1905       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1906       for (j=0; j<mat->cmap->N; j++) {
1907         if (tmp2[j] > *norm) *norm = tmp2[j];
1908       }
1909       ierr = PetscFree(tmp);CHKERRQ(ierr);
1910       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1911     } else if (type == NORM_INFINITY) { /* max row norm */
1912       PetscReal ntemp = 0.0;
1913       for (j=0; j<aij->A->rmap->n; j++) {
1914         v = amat->a + amat->i[j];
1915         sum = 0.0;
1916         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1917           sum += PetscAbsScalar(*v); v++;
1918         }
1919         v = bmat->a + bmat->i[j];
1920         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1921           sum += PetscAbsScalar(*v); v++;
1922         }
1923         if (sum > ntemp) ntemp = sum;
1924       }
1925       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1926     } else {
1927       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1928     }
1929   }
1930   PetscFunctionReturn(0);
1931 }
1932 
1933 #undef __FUNCT__
1934 #define __FUNCT__ "MatTranspose_MPIAIJ"
1935 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1936 {
1937   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1938   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1939   PetscErrorCode ierr;
1940   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1941   PetscInt       cstart=A->cmap->rstart,ncol;
1942   Mat            B;
1943   MatScalar      *array;
1944 
1945   PetscFunctionBegin;
1946   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1947 
1948   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1949   ai = Aloc->i; aj = Aloc->j;
1950   bi = Bloc->i; bj = Bloc->j;
1951   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1952     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1953     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1954     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1955     for (i=0; i<ai[ma]; i++){
1956       d_nnz[aj[i]] ++;
1957       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1958     }
1959 
1960     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1961     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1962     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1963     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1964     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1965   } else {
1966     B = *matout;
1967   }
1968 
1969   /* copy over the A part */
1970   array = Aloc->a;
1971   row = A->rmap->rstart;
1972   for (i=0; i<ma; i++) {
1973     ncol = ai[i+1]-ai[i];
1974     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1975     row++; array += ncol; aj += ncol;
1976   }
1977   aj = Aloc->j;
1978   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1979 
1980   /* copy over the B part */
1981   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1982   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1983   array = Bloc->a;
1984   row = A->rmap->rstart;
1985   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1986   cols_tmp = cols;
1987   for (i=0; i<mb; i++) {
1988     ncol = bi[i+1]-bi[i];
1989     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1990     row++; array += ncol; cols_tmp += ncol;
1991   }
1992   ierr = PetscFree(cols);CHKERRQ(ierr);
1993 
1994   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1995   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1996   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1997     *matout = B;
1998   } else {
1999     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2000   }
2001   PetscFunctionReturn(0);
2002 }
2003 
2004 #undef __FUNCT__
2005 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2006 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2007 {
2008   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2009   Mat            a = aij->A,b = aij->B;
2010   PetscErrorCode ierr;
2011   PetscInt       s1,s2,s3;
2012 
2013   PetscFunctionBegin;
2014   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2015   if (rr) {
2016     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2017     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2018     /* Overlap communication with computation. */
2019     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2020   }
2021   if (ll) {
2022     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2023     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2024     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2025   }
2026   /* scale  the diagonal block */
2027   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2028 
2029   if (rr) {
2030     /* Do a scatter end and then right scale the off-diagonal block */
2031     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2032     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2033   }
2034 
2035   PetscFunctionReturn(0);
2036 }
2037 
2038 #undef __FUNCT__
2039 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
2040 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2041 {
2042   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2043   PetscErrorCode ierr;
2044 
2045   PetscFunctionBegin;
2046   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2047   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2048   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
2049   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
2050   PetscFunctionReturn(0);
2051 }
2052 #undef __FUNCT__
2053 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2054 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2055 {
2056   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2057   PetscErrorCode ierr;
2058 
2059   PetscFunctionBegin;
2060   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2061   PetscFunctionReturn(0);
2062 }
2063 
2064 #undef __FUNCT__
2065 #define __FUNCT__ "MatEqual_MPIAIJ"
2066 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2067 {
2068   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2069   Mat            a,b,c,d;
2070   PetscBool      flg;
2071   PetscErrorCode ierr;
2072 
2073   PetscFunctionBegin;
2074   a = matA->A; b = matA->B;
2075   c = matB->A; d = matB->B;
2076 
2077   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2078   if (flg) {
2079     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2080   }
2081   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2082   PetscFunctionReturn(0);
2083 }
2084 
2085 #undef __FUNCT__
2086 #define __FUNCT__ "MatCopy_MPIAIJ"
2087 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2088 {
2089   PetscErrorCode ierr;
2090   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2091   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2092 
2093   PetscFunctionBegin;
2094   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2095   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2096     /* because of the column compression in the off-processor part of the matrix a->B,
2097        the number of columns in a->B and b->B may be different, hence we cannot call
2098        the MatCopy() directly on the two parts. If need be, we can provide a more
2099        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2100        then copying the submatrices */
2101     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2102   } else {
2103     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2104     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2105   }
2106   PetscFunctionReturn(0);
2107 }
2108 
2109 #undef __FUNCT__
2110 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
2111 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
2112 {
2113   PetscErrorCode ierr;
2114 
2115   PetscFunctionBegin;
2116   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2117   PetscFunctionReturn(0);
2118 }
2119 
2120 #undef __FUNCT__
2121 #define __FUNCT__ "MatAXPY_MPIAIJ"
2122 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2123 {
2124   PetscErrorCode ierr;
2125   PetscInt       i;
2126   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2127   PetscBLASInt   bnz,one=1;
2128   Mat_SeqAIJ     *x,*y;
2129 
2130   PetscFunctionBegin;
2131   if (str == SAME_NONZERO_PATTERN) {
2132     PetscScalar alpha = a;
2133     x = (Mat_SeqAIJ *)xx->A->data;
2134     y = (Mat_SeqAIJ *)yy->A->data;
2135     bnz = PetscBLASIntCast(x->nz);
2136     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2137     x = (Mat_SeqAIJ *)xx->B->data;
2138     y = (Mat_SeqAIJ *)yy->B->data;
2139     bnz = PetscBLASIntCast(x->nz);
2140     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2141   } else if (str == SUBSET_NONZERO_PATTERN) {
2142     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2143 
2144     x = (Mat_SeqAIJ *)xx->B->data;
2145     y = (Mat_SeqAIJ *)yy->B->data;
2146     if (y->xtoy && y->XtoY != xx->B) {
2147       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2148       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2149     }
2150     if (!y->xtoy) { /* get xtoy */
2151       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2152       y->XtoY = xx->B;
2153       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2154     }
2155     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2156   } else {
2157     Mat B;
2158     PetscInt *nnz_d,*nnz_o;
2159     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2160     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2161     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2162     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2163     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2164     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2165     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2166     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2167     ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr);
2168     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2169     ierr = MatHeaderReplace(Y,B);
2170     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2171     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2172   }
2173   PetscFunctionReturn(0);
2174 }
2175 
2176 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2177 
2178 #undef __FUNCT__
2179 #define __FUNCT__ "MatConjugate_MPIAIJ"
2180 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2181 {
2182 #if defined(PETSC_USE_COMPLEX)
2183   PetscErrorCode ierr;
2184   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2185 
2186   PetscFunctionBegin;
2187   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2188   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2189 #else
2190   PetscFunctionBegin;
2191 #endif
2192   PetscFunctionReturn(0);
2193 }
2194 
2195 #undef __FUNCT__
2196 #define __FUNCT__ "MatRealPart_MPIAIJ"
2197 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2198 {
2199   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2200   PetscErrorCode ierr;
2201 
2202   PetscFunctionBegin;
2203   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2204   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2205   PetscFunctionReturn(0);
2206 }
2207 
2208 #undef __FUNCT__
2209 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2210 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2211 {
2212   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2213   PetscErrorCode ierr;
2214 
2215   PetscFunctionBegin;
2216   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2217   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 #ifdef PETSC_HAVE_PBGL
2222 
2223 #include <boost/parallel/mpi/bsp_process_group.hpp>
2224 #include <boost/graph/distributed/ilu_default_graph.hpp>
2225 #include <boost/graph/distributed/ilu_0_block.hpp>
2226 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2227 #include <boost/graph/distributed/petsc/interface.hpp>
2228 #include <boost/multi_array.hpp>
2229 #include <boost/parallel/distributed_property_map->hpp>
2230 
2231 #undef __FUNCT__
2232 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2233 /*
2234   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2235 */
2236 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2237 {
2238   namespace petsc = boost::distributed::petsc;
2239 
2240   namespace graph_dist = boost::graph::distributed;
2241   using boost::graph::distributed::ilu_default::process_group_type;
2242   using boost::graph::ilu_permuted;
2243 
2244   PetscBool       row_identity, col_identity;
2245   PetscContainer  c;
2246   PetscInt        m, n, M, N;
2247   PetscErrorCode  ierr;
2248 
2249   PetscFunctionBegin;
2250   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2251   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2252   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2253   if (!row_identity || !col_identity) {
2254     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2255   }
2256 
2257   process_group_type pg;
2258   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2259   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2260   lgraph_type&   level_graph = *lgraph_p;
2261   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2262 
2263   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2264   ilu_permuted(level_graph);
2265 
2266   /* put together the new matrix */
2267   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2268   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2269   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2270   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2271   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2272   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2273   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2274 
2275   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2276   ierr = PetscContainerSetPointer(c, lgraph_p);
2277   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2278   PetscFunctionReturn(0);
2279 }
2280 
2281 #undef __FUNCT__
2282 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2283 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2284 {
2285   PetscFunctionBegin;
2286   PetscFunctionReturn(0);
2287 }
2288 
2289 #undef __FUNCT__
2290 #define __FUNCT__ "MatSolve_MPIAIJ"
2291 /*
2292   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2293 */
2294 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2295 {
2296   namespace graph_dist = boost::graph::distributed;
2297 
2298   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2299   lgraph_type*   lgraph_p;
2300   PetscContainer c;
2301   PetscErrorCode ierr;
2302 
2303   PetscFunctionBegin;
2304   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2305   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2306   ierr = VecCopy(b, x);CHKERRQ(ierr);
2307 
2308   PetscScalar* array_x;
2309   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2310   PetscInt sx;
2311   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2312 
2313   PetscScalar* array_b;
2314   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2315   PetscInt sb;
2316   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2317 
2318   lgraph_type&   level_graph = *lgraph_p;
2319   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2320 
2321   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2322   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2323                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2324 
2325   typedef boost::iterator_property_map<array_ref_type::iterator,
2326                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2327   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2328                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2329 
2330   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2331 
2332   PetscFunctionReturn(0);
2333 }
2334 #endif
2335 
2336 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2337   PetscInt       nzlocal,nsends,nrecvs;
2338   PetscMPIInt    *send_rank,*recv_rank;
2339   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2340   PetscScalar    *sbuf_a,**rbuf_a;
2341   PetscErrorCode (*MatDestroy)(Mat);
2342 } Mat_Redundant;
2343 
2344 #undef __FUNCT__
2345 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2346 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2347 {
2348   PetscErrorCode       ierr;
2349   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2350   PetscInt             i;
2351 
2352   PetscFunctionBegin;
2353   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2354   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2355   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2356   for (i=0; i<redund->nrecvs; i++){
2357     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2358     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2359   }
2360   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2361   ierr = PetscFree(redund);CHKERRQ(ierr);
2362   PetscFunctionReturn(0);
2363 }
2364 
2365 #undef __FUNCT__
2366 #define __FUNCT__ "MatDestroy_MatRedundant"
2367 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2368 {
2369   PetscErrorCode  ierr;
2370   PetscContainer  container;
2371   Mat_Redundant   *redund=PETSC_NULL;
2372 
2373   PetscFunctionBegin;
2374   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2375   if (container) {
2376     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2377   } else {
2378     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2379   }
2380   A->ops->destroy = redund->MatDestroy;
2381   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2382   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2383   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2384   PetscFunctionReturn(0);
2385 }
2386 
2387 #undef __FUNCT__
2388 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2389 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2390 {
2391   PetscMPIInt    rank,size;
2392   MPI_Comm       comm=((PetscObject)mat)->comm;
2393   PetscErrorCode ierr;
2394   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2395   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2396   PetscInt       *rowrange=mat->rmap->range;
2397   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2398   Mat            A=aij->A,B=aij->B,C=*matredundant;
2399   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2400   PetscScalar    *sbuf_a;
2401   PetscInt       nzlocal=a->nz+b->nz;
2402   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2403   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2404   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2405   MatScalar      *aworkA,*aworkB;
2406   PetscScalar    *vals;
2407   PetscMPIInt    tag1,tag2,tag3,imdex;
2408   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2409                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2410   MPI_Status     recv_status,*send_status;
2411   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2412   PetscInt       **rbuf_j=PETSC_NULL;
2413   PetscScalar    **rbuf_a=PETSC_NULL;
2414   Mat_Redundant  *redund=PETSC_NULL;
2415   PetscContainer container;
2416 
2417   PetscFunctionBegin;
2418   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2419   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2420 
2421   if (reuse == MAT_REUSE_MATRIX) {
2422     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2423     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2424     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2425     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2426     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2427     if (container) {
2428       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2429     } else {
2430       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2431     }
2432     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2433 
2434     nsends    = redund->nsends;
2435     nrecvs    = redund->nrecvs;
2436     send_rank = redund->send_rank;
2437     recv_rank = redund->recv_rank;
2438     sbuf_nz   = redund->sbuf_nz;
2439     rbuf_nz   = redund->rbuf_nz;
2440     sbuf_j    = redund->sbuf_j;
2441     sbuf_a    = redund->sbuf_a;
2442     rbuf_j    = redund->rbuf_j;
2443     rbuf_a    = redund->rbuf_a;
2444   }
2445 
2446   if (reuse == MAT_INITIAL_MATRIX){
2447     PetscMPIInt  subrank,subsize;
2448     PetscInt     nleftover,np_subcomm;
2449     /* get the destination processors' id send_rank, nsends and nrecvs */
2450     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2451     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2452     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2453     np_subcomm = size/nsubcomm;
2454     nleftover  = size - nsubcomm*np_subcomm;
2455     nsends = 0; nrecvs = 0;
2456     for (i=0; i<size; i++){ /* i=rank*/
2457       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2458         send_rank[nsends] = i; nsends++;
2459         recv_rank[nrecvs++] = i;
2460       }
2461     }
2462     if (rank >= size - nleftover){/* this proc is a leftover processor */
2463       i = size-nleftover-1;
2464       j = 0;
2465       while (j < nsubcomm - nleftover){
2466         send_rank[nsends++] = i;
2467         i--; j++;
2468       }
2469     }
2470 
2471     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2472       for (i=0; i<nleftover; i++){
2473         recv_rank[nrecvs++] = size-nleftover+i;
2474       }
2475     }
2476 
2477     /* allocate sbuf_j, sbuf_a */
2478     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2479     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2480     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2481   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2482 
2483   /* copy mat's local entries into the buffers */
2484   if (reuse == MAT_INITIAL_MATRIX){
2485     rownz_max = 0;
2486     rptr = sbuf_j;
2487     cols = sbuf_j + rend-rstart + 1;
2488     vals = sbuf_a;
2489     rptr[0] = 0;
2490     for (i=0; i<rend-rstart; i++){
2491       row = i + rstart;
2492       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2493       ncols  = nzA + nzB;
2494       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2495       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2496       /* load the column indices for this row into cols */
2497       lwrite = 0;
2498       for (l=0; l<nzB; l++) {
2499         if ((ctmp = bmap[cworkB[l]]) < cstart){
2500           vals[lwrite]   = aworkB[l];
2501           cols[lwrite++] = ctmp;
2502         }
2503       }
2504       for (l=0; l<nzA; l++){
2505         vals[lwrite]   = aworkA[l];
2506         cols[lwrite++] = cstart + cworkA[l];
2507       }
2508       for (l=0; l<nzB; l++) {
2509         if ((ctmp = bmap[cworkB[l]]) >= cend){
2510           vals[lwrite]   = aworkB[l];
2511           cols[lwrite++] = ctmp;
2512         }
2513       }
2514       vals += ncols;
2515       cols += ncols;
2516       rptr[i+1] = rptr[i] + ncols;
2517       if (rownz_max < ncols) rownz_max = ncols;
2518     }
2519     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2520   } else { /* only copy matrix values into sbuf_a */
2521     rptr = sbuf_j;
2522     vals = sbuf_a;
2523     rptr[0] = 0;
2524     for (i=0; i<rend-rstart; i++){
2525       row = i + rstart;
2526       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2527       ncols  = nzA + nzB;
2528       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2529       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2530       lwrite = 0;
2531       for (l=0; l<nzB; l++) {
2532         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2533       }
2534       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2535       for (l=0; l<nzB; l++) {
2536         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2537       }
2538       vals += ncols;
2539       rptr[i+1] = rptr[i] + ncols;
2540     }
2541   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2542 
2543   /* send nzlocal to others, and recv other's nzlocal */
2544   /*--------------------------------------------------*/
2545   if (reuse == MAT_INITIAL_MATRIX){
2546     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2547     s_waits2 = s_waits3 + nsends;
2548     s_waits1 = s_waits2 + nsends;
2549     r_waits1 = s_waits1 + nsends;
2550     r_waits2 = r_waits1 + nrecvs;
2551     r_waits3 = r_waits2 + nrecvs;
2552   } else {
2553     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2554     r_waits3 = s_waits3 + nsends;
2555   }
2556 
2557   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2558   if (reuse == MAT_INITIAL_MATRIX){
2559     /* get new tags to keep the communication clean */
2560     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2561     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2562     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2563 
2564     /* post receives of other's nzlocal */
2565     for (i=0; i<nrecvs; i++){
2566       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2567     }
2568     /* send nzlocal to others */
2569     for (i=0; i<nsends; i++){
2570       sbuf_nz[i] = nzlocal;
2571       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2572     }
2573     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2574     count = nrecvs;
2575     while (count) {
2576       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2577       recv_rank[imdex] = recv_status.MPI_SOURCE;
2578       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2579       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2580 
2581       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2582       rbuf_nz[imdex] += i + 2;
2583       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2584       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2585       count--;
2586     }
2587     /* wait on sends of nzlocal */
2588     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2589     /* send mat->i,j to others, and recv from other's */
2590     /*------------------------------------------------*/
2591     for (i=0; i<nsends; i++){
2592       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2593       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2594     }
2595     /* wait on receives of mat->i,j */
2596     /*------------------------------*/
2597     count = nrecvs;
2598     while (count) {
2599       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2600       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2601       count--;
2602     }
2603     /* wait on sends of mat->i,j */
2604     /*---------------------------*/
2605     if (nsends) {
2606       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2607     }
2608   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2609 
2610   /* post receives, send and receive mat->a */
2611   /*----------------------------------------*/
2612   for (imdex=0; imdex<nrecvs; imdex++) {
2613     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2614   }
2615   for (i=0; i<nsends; i++){
2616     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2617   }
2618   count = nrecvs;
2619   while (count) {
2620     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2621     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2622     count--;
2623   }
2624   if (nsends) {
2625     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2626   }
2627 
2628   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2629 
2630   /* create redundant matrix */
2631   /*-------------------------*/
2632   if (reuse == MAT_INITIAL_MATRIX){
2633     /* compute rownz_max for preallocation */
2634     for (imdex=0; imdex<nrecvs; imdex++){
2635       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2636       rptr = rbuf_j[imdex];
2637       for (i=0; i<j; i++){
2638         ncols = rptr[i+1] - rptr[i];
2639         if (rownz_max < ncols) rownz_max = ncols;
2640       }
2641     }
2642 
2643     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2644     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2645     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2646     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2647     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2648   } else {
2649     C = *matredundant;
2650   }
2651 
2652   /* insert local matrix entries */
2653   rptr = sbuf_j;
2654   cols = sbuf_j + rend-rstart + 1;
2655   vals = sbuf_a;
2656   for (i=0; i<rend-rstart; i++){
2657     row   = i + rstart;
2658     ncols = rptr[i+1] - rptr[i];
2659     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2660     vals += ncols;
2661     cols += ncols;
2662   }
2663   /* insert received matrix entries */
2664   for (imdex=0; imdex<nrecvs; imdex++){
2665     rstart = rowrange[recv_rank[imdex]];
2666     rend   = rowrange[recv_rank[imdex]+1];
2667     rptr = rbuf_j[imdex];
2668     cols = rbuf_j[imdex] + rend-rstart + 1;
2669     vals = rbuf_a[imdex];
2670     for (i=0; i<rend-rstart; i++){
2671       row   = i + rstart;
2672       ncols = rptr[i+1] - rptr[i];
2673       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2674       vals += ncols;
2675       cols += ncols;
2676     }
2677   }
2678   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2679   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2680   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2681   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2682   if (reuse == MAT_INITIAL_MATRIX){
2683     PetscContainer container;
2684     *matredundant = C;
2685     /* create a supporting struct and attach it to C for reuse */
2686     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2687     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2688     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2689     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2690     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2691 
2692     redund->nzlocal = nzlocal;
2693     redund->nsends  = nsends;
2694     redund->nrecvs  = nrecvs;
2695     redund->send_rank = send_rank;
2696     redund->recv_rank = recv_rank;
2697     redund->sbuf_nz = sbuf_nz;
2698     redund->rbuf_nz = rbuf_nz;
2699     redund->sbuf_j  = sbuf_j;
2700     redund->sbuf_a  = sbuf_a;
2701     redund->rbuf_j  = rbuf_j;
2702     redund->rbuf_a  = rbuf_a;
2703 
2704     redund->MatDestroy = C->ops->destroy;
2705     C->ops->destroy    = MatDestroy_MatRedundant;
2706   }
2707   PetscFunctionReturn(0);
2708 }
2709 
2710 #undef __FUNCT__
2711 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2712 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2713 {
2714   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2715   PetscErrorCode ierr;
2716   PetscInt       i,*idxb = 0;
2717   PetscScalar    *va,*vb;
2718   Vec            vtmp;
2719 
2720   PetscFunctionBegin;
2721   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2722   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2723   if (idx) {
2724     for (i=0; i<A->rmap->n; i++) {
2725       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2726     }
2727   }
2728 
2729   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2730   if (idx) {
2731     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2732   }
2733   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2734   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2735 
2736   for (i=0; i<A->rmap->n; i++){
2737     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2738       va[i] = vb[i];
2739       if (idx) idx[i] = a->garray[idxb[i]];
2740     }
2741   }
2742 
2743   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2744   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2745   if (idxb) {
2746     ierr = PetscFree(idxb);CHKERRQ(ierr);
2747   }
2748   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2749   PetscFunctionReturn(0);
2750 }
2751 
2752 #undef __FUNCT__
2753 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2754 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2755 {
2756   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2757   PetscErrorCode ierr;
2758   PetscInt       i,*idxb = 0;
2759   PetscScalar    *va,*vb;
2760   Vec            vtmp;
2761 
2762   PetscFunctionBegin;
2763   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2764   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2765   if (idx) {
2766     for (i=0; i<A->cmap->n; i++) {
2767       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2768     }
2769   }
2770 
2771   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2772   if (idx) {
2773     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2774   }
2775   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2776   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2777 
2778   for (i=0; i<A->rmap->n; i++){
2779     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2780       va[i] = vb[i];
2781       if (idx) idx[i] = a->garray[idxb[i]];
2782     }
2783   }
2784 
2785   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2786   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2787   if (idxb) {
2788     ierr = PetscFree(idxb);CHKERRQ(ierr);
2789   }
2790   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2791   PetscFunctionReturn(0);
2792 }
2793 
2794 #undef __FUNCT__
2795 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2796 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2797 {
2798   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2799   PetscInt       n      = A->rmap->n;
2800   PetscInt       cstart = A->cmap->rstart;
2801   PetscInt      *cmap   = mat->garray;
2802   PetscInt      *diagIdx, *offdiagIdx;
2803   Vec            diagV, offdiagV;
2804   PetscScalar   *a, *diagA, *offdiagA;
2805   PetscInt       r;
2806   PetscErrorCode ierr;
2807 
2808   PetscFunctionBegin;
2809   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2810   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2811   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2812   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2813   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2814   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2815   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2816   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2817   for(r = 0; r < n; ++r) {
2818     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2819       a[r]   = diagA[r];
2820       idx[r] = cstart + diagIdx[r];
2821     } else {
2822       a[r]   = offdiagA[r];
2823       idx[r] = cmap[offdiagIdx[r]];
2824     }
2825   }
2826   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2827   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2828   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2829   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2830   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2831   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2832   PetscFunctionReturn(0);
2833 }
2834 
2835 #undef __FUNCT__
2836 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2837 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2838 {
2839   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2840   PetscInt       n      = A->rmap->n;
2841   PetscInt       cstart = A->cmap->rstart;
2842   PetscInt      *cmap   = mat->garray;
2843   PetscInt      *diagIdx, *offdiagIdx;
2844   Vec            diagV, offdiagV;
2845   PetscScalar   *a, *diagA, *offdiagA;
2846   PetscInt       r;
2847   PetscErrorCode ierr;
2848 
2849   PetscFunctionBegin;
2850   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2851   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2852   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2853   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2854   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2855   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2856   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2857   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2858   for(r = 0; r < n; ++r) {
2859     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2860       a[r]   = diagA[r];
2861       idx[r] = cstart + diagIdx[r];
2862     } else {
2863       a[r]   = offdiagA[r];
2864       idx[r] = cmap[offdiagIdx[r]];
2865     }
2866   }
2867   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2868   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2869   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2870   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2871   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2872   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2873   PetscFunctionReturn(0);
2874 }
2875 
2876 #undef __FUNCT__
2877 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2878 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2879 {
2880   PetscErrorCode ierr;
2881   Mat            *dummy;
2882 
2883   PetscFunctionBegin;
2884   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2885   *newmat = *dummy;
2886   ierr = PetscFree(dummy);CHKERRQ(ierr);
2887   PetscFunctionReturn(0);
2888 }
2889 
2890 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2891 /* -------------------------------------------------------------------*/
2892 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2893        MatGetRow_MPIAIJ,
2894        MatRestoreRow_MPIAIJ,
2895        MatMult_MPIAIJ,
2896 /* 4*/ MatMultAdd_MPIAIJ,
2897        MatMultTranspose_MPIAIJ,
2898        MatMultTransposeAdd_MPIAIJ,
2899 #ifdef PETSC_HAVE_PBGL
2900        MatSolve_MPIAIJ,
2901 #else
2902        0,
2903 #endif
2904        0,
2905        0,
2906 /*10*/ 0,
2907        0,
2908        0,
2909        MatSOR_MPIAIJ,
2910        MatTranspose_MPIAIJ,
2911 /*15*/ MatGetInfo_MPIAIJ,
2912        MatEqual_MPIAIJ,
2913        MatGetDiagonal_MPIAIJ,
2914        MatDiagonalScale_MPIAIJ,
2915        MatNorm_MPIAIJ,
2916 /*20*/ MatAssemblyBegin_MPIAIJ,
2917        MatAssemblyEnd_MPIAIJ,
2918        MatSetOption_MPIAIJ,
2919        MatZeroEntries_MPIAIJ,
2920 /*24*/ MatZeroRows_MPIAIJ,
2921        0,
2922 #ifdef PETSC_HAVE_PBGL
2923        0,
2924 #else
2925        0,
2926 #endif
2927        0,
2928        0,
2929 /*29*/ MatSetUpPreallocation_MPIAIJ,
2930 #ifdef PETSC_HAVE_PBGL
2931        0,
2932 #else
2933        0,
2934 #endif
2935        0,
2936        0,
2937        0,
2938 /*34*/ MatDuplicate_MPIAIJ,
2939        0,
2940        0,
2941        0,
2942        0,
2943 /*39*/ MatAXPY_MPIAIJ,
2944        MatGetSubMatrices_MPIAIJ,
2945        MatIncreaseOverlap_MPIAIJ,
2946        MatGetValues_MPIAIJ,
2947        MatCopy_MPIAIJ,
2948 /*44*/ MatGetRowMax_MPIAIJ,
2949        MatScale_MPIAIJ,
2950        0,
2951        0,
2952        MatZeroRowsColumns_MPIAIJ,
2953 /*49*/ MatSetBlockSize_MPIAIJ,
2954        0,
2955        0,
2956        0,
2957        0,
2958 /*54*/ MatFDColoringCreate_MPIAIJ,
2959        0,
2960        MatSetUnfactored_MPIAIJ,
2961        MatPermute_MPIAIJ,
2962        0,
2963 /*59*/ MatGetSubMatrix_MPIAIJ,
2964        MatDestroy_MPIAIJ,
2965        MatView_MPIAIJ,
2966        0,
2967        0,
2968 /*64*/ 0,
2969        0,
2970        0,
2971        0,
2972        0,
2973 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2974        MatGetRowMinAbs_MPIAIJ,
2975        0,
2976        MatSetColoring_MPIAIJ,
2977 #if defined(PETSC_HAVE_ADIC)
2978        MatSetValuesAdic_MPIAIJ,
2979 #else
2980        0,
2981 #endif
2982        MatSetValuesAdifor_MPIAIJ,
2983 /*75*/ MatFDColoringApply_AIJ,
2984        0,
2985        0,
2986        0,
2987        0,
2988 /*80*/ 0,
2989        0,
2990        0,
2991 /*83*/ MatLoad_MPIAIJ,
2992        0,
2993        0,
2994        0,
2995        0,
2996        0,
2997 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2998        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2999        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3000        MatPtAP_Basic,
3001        MatPtAPSymbolic_MPIAIJ,
3002 /*94*/ MatPtAPNumeric_MPIAIJ,
3003        0,
3004        0,
3005        0,
3006        0,
3007 /*99*/ 0,
3008        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3009        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3010        MatConjugate_MPIAIJ,
3011        0,
3012 /*104*/MatSetValuesRow_MPIAIJ,
3013        MatRealPart_MPIAIJ,
3014        MatImaginaryPart_MPIAIJ,
3015        0,
3016        0,
3017 /*109*/0,
3018        MatGetRedundantMatrix_MPIAIJ,
3019        MatGetRowMin_MPIAIJ,
3020        0,
3021        0,
3022 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
3023        0,
3024        0,
3025        0,
3026        0,
3027 /*119*/0,
3028        0,
3029        0,
3030        0,
3031        MatGetMultiProcBlock_MPIAIJ,
3032 /*124*/MatFindNonZeroRows_MPIAIJ,
3033        0,
3034        0,
3035        0,
3036        MatGetSubMatricesParallel_MPIAIJ
3037 };
3038 
3039 /* ----------------------------------------------------------------------------------------*/
3040 
3041 EXTERN_C_BEGIN
3042 #undef __FUNCT__
3043 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3044 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3045 {
3046   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3047   PetscErrorCode ierr;
3048 
3049   PetscFunctionBegin;
3050   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3051   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3052   PetscFunctionReturn(0);
3053 }
3054 EXTERN_C_END
3055 
3056 EXTERN_C_BEGIN
3057 #undef __FUNCT__
3058 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3059 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3060 {
3061   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3062   PetscErrorCode ierr;
3063 
3064   PetscFunctionBegin;
3065   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3066   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3067   PetscFunctionReturn(0);
3068 }
3069 EXTERN_C_END
3070 
3071 EXTERN_C_BEGIN
3072 #undef __FUNCT__
3073 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3074 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3075 {
3076   Mat_MPIAIJ     *b;
3077   PetscErrorCode ierr;
3078   PetscInt       i;
3079 
3080   PetscFunctionBegin;
3081   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3082   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3083   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3084   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3085 
3086   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3087   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3088   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3089   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3090   if (d_nnz) {
3091     for (i=0; i<B->rmap->n; i++) {
3092       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3093     }
3094   }
3095   if (o_nnz) {
3096     for (i=0; i<B->rmap->n; i++) {
3097       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3098     }
3099   }
3100   b = (Mat_MPIAIJ*)B->data;
3101 
3102   if (!B->preallocated) {
3103     /* Explicitly create 2 MATSEQAIJ matrices. */
3104     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3105     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3106     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3107     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3108     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3109     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3110     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3111     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3112   }
3113 
3114   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3115   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3116   B->preallocated = PETSC_TRUE;
3117   PetscFunctionReturn(0);
3118 }
3119 EXTERN_C_END
3120 
3121 #undef __FUNCT__
3122 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3123 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3124 {
3125   Mat            mat;
3126   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3127   PetscErrorCode ierr;
3128 
3129   PetscFunctionBegin;
3130   *newmat       = 0;
3131   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3132   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3133   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3134   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3135   a    = (Mat_MPIAIJ*)mat->data;
3136 
3137   mat->factortype    = matin->factortype;
3138   mat->rmap->bs      = matin->rmap->bs;
3139   mat->assembled    = PETSC_TRUE;
3140   mat->insertmode   = NOT_SET_VALUES;
3141   mat->preallocated = PETSC_TRUE;
3142 
3143   a->size           = oldmat->size;
3144   a->rank           = oldmat->rank;
3145   a->donotstash     = oldmat->donotstash;
3146   a->roworiented    = oldmat->roworiented;
3147   a->rowindices     = 0;
3148   a->rowvalues      = 0;
3149   a->getrowactive   = PETSC_FALSE;
3150 
3151   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3152   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3153 
3154   if (oldmat->colmap) {
3155 #if defined (PETSC_USE_CTABLE)
3156     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3157 #else
3158     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3159     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3160     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3161 #endif
3162   } else a->colmap = 0;
3163   if (oldmat->garray) {
3164     PetscInt len;
3165     len  = oldmat->B->cmap->n;
3166     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3167     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3168     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3169   } else a->garray = 0;
3170 
3171   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3172   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3173   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3174   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3175   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3176   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3177   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3178   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3179   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3180   *newmat = mat;
3181   PetscFunctionReturn(0);
3182 }
3183 
3184 /*
3185     Allows sending/receiving larger messages then 2 gigabytes in a single call
3186 */
3187 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm)
3188 {
3189   int             ierr;
3190   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
3191   PetscInt        i,numchunks;
3192   PetscMPIInt     icnt;
3193 
3194   numchunks = cnt/CHUNKSIZE + 1;
3195   for (i=0; i<numchunks; i++) {
3196     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
3197     ierr = MPI_Send(mess,icnt,type,to,tag,comm);
3198     if (type == MPIU_INT) {
3199       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
3200     } else if (type == MPIU_SCALAR) {
3201       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
3202     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
3203   }
3204   return 0;
3205 }
3206 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm)
3207 {
3208   int             ierr;
3209   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
3210   MPI_Status      status;
3211   PetscInt        i,numchunks;
3212   PetscMPIInt     icnt;
3213 
3214   numchunks = cnt/CHUNKSIZE + 1;
3215   for (i=0; i<numchunks; i++) {
3216     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
3217     ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status);
3218     if (type == MPIU_INT) {
3219       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
3220     } else if (type == MPIU_SCALAR) {
3221       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
3222     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
3223   }
3224   return 0;
3225 }
3226 
3227 #undef __FUNCT__
3228 #define __FUNCT__ "MatLoad_MPIAIJ"
3229 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3230 {
3231   PetscScalar    *vals,*svals;
3232   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3233   PetscErrorCode ierr;
3234   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3235   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3236   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3237   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3238   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3239   int            fd;
3240 
3241   PetscFunctionBegin;
3242   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3243   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3244   if (!rank) {
3245     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3246     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3247     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3248   }
3249 
3250   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3251 
3252   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3253   M = header[1]; N = header[2];
3254   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3255   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3256   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3257 
3258   /* If global sizes are set, check if they are consistent with that given in the file */
3259   if (sizesset) {
3260     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3261   }
3262   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3263   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3264 
3265   /* determine ownership of all rows */
3266   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3267   else m = newMat->rmap->n; /* Set by user */
3268 
3269   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3270   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3271 
3272   /* First process needs enough room for process with most rows */
3273   if (!rank) {
3274     mmax       = rowners[1];
3275     for (i=2; i<size; i++) {
3276       mmax = PetscMax(mmax,rowners[i]);
3277     }
3278   } else mmax = m;
3279 
3280   rowners[0] = 0;
3281   for (i=2; i<=size; i++) {
3282     rowners[i] += rowners[i-1];
3283   }
3284   rstart = rowners[rank];
3285   rend   = rowners[rank+1];
3286 
3287   /* distribute row lengths to all processors */
3288   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3289   if (!rank) {
3290     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3291     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3292     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3293     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3294     for (j=0; j<m; j++) {
3295       procsnz[0] += ourlens[j];
3296     }
3297     for (i=1; i<size; i++) {
3298       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3299       /* calculate the number of nonzeros on each processor */
3300       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3301         procsnz[i] += rowlengths[j];
3302       }
3303       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3304     }
3305     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3306   } else {
3307     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3308   }
3309 
3310   if (!rank) {
3311     /* determine max buffer needed and allocate it */
3312     maxnz = 0;
3313     for (i=0; i<size; i++) {
3314       maxnz = PetscMax(maxnz,procsnz[i]);
3315     }
3316     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3317 
3318     /* read in my part of the matrix column indices  */
3319     nz   = procsnz[0];
3320     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3321     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3322 
3323     /* read in every one elses and ship off */
3324     for (i=1; i<size; i++) {
3325       nz     = procsnz[i];
3326       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3327       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3328     }
3329     ierr = PetscFree(cols);CHKERRQ(ierr);
3330   } else {
3331     /* determine buffer space needed for message */
3332     nz = 0;
3333     for (i=0; i<m; i++) {
3334       nz += ourlens[i];
3335     }
3336     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3337 
3338     /* receive message of column indices*/
3339     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3340   }
3341 
3342   /* determine column ownership if matrix is not square */
3343   if (N != M) {
3344     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3345     else n = newMat->cmap->n;
3346     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3347     cstart = cend - n;
3348   } else {
3349     cstart = rstart;
3350     cend   = rend;
3351     n      = cend - cstart;
3352   }
3353 
3354   /* loop over local rows, determining number of off diagonal entries */
3355   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3356   jj = 0;
3357   for (i=0; i<m; i++) {
3358     for (j=0; j<ourlens[i]; j++) {
3359       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3360       jj++;
3361     }
3362   }
3363 
3364   for (i=0; i<m; i++) {
3365     ourlens[i] -= offlens[i];
3366   }
3367   if (!sizesset) {
3368     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3369   }
3370   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3371 
3372   for (i=0; i<m; i++) {
3373     ourlens[i] += offlens[i];
3374   }
3375 
3376   if (!rank) {
3377     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3378 
3379     /* read in my part of the matrix numerical values  */
3380     nz   = procsnz[0];
3381     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3382 
3383     /* insert into matrix */
3384     jj      = rstart;
3385     smycols = mycols;
3386     svals   = vals;
3387     for (i=0; i<m; i++) {
3388       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3389       smycols += ourlens[i];
3390       svals   += ourlens[i];
3391       jj++;
3392     }
3393 
3394     /* read in other processors and ship out */
3395     for (i=1; i<size; i++) {
3396       nz     = procsnz[i];
3397       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3398       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3399     }
3400     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3401   } else {
3402     /* receive numeric values */
3403     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3404 
3405     /* receive message of values*/
3406     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3407 
3408     /* insert into matrix */
3409     jj      = rstart;
3410     smycols = mycols;
3411     svals   = vals;
3412     for (i=0; i<m; i++) {
3413       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3414       smycols += ourlens[i];
3415       svals   += ourlens[i];
3416       jj++;
3417     }
3418   }
3419   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3420   ierr = PetscFree(vals);CHKERRQ(ierr);
3421   ierr = PetscFree(mycols);CHKERRQ(ierr);
3422   ierr = PetscFree(rowners);CHKERRQ(ierr);
3423 
3424   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3425   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3426   PetscFunctionReturn(0);
3427 }
3428 
3429 #undef __FUNCT__
3430 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3431 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3432 {
3433   PetscErrorCode ierr;
3434   IS             iscol_local;
3435   PetscInt       csize;
3436 
3437   PetscFunctionBegin;
3438   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3439   if (call == MAT_REUSE_MATRIX) {
3440     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3441     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3442   } else {
3443     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3444   }
3445   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3446   if (call == MAT_INITIAL_MATRIX) {
3447     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3448     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3449   }
3450   PetscFunctionReturn(0);
3451 }
3452 
3453 #undef __FUNCT__
3454 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3455 /*
3456     Not great since it makes two copies of the submatrix, first an SeqAIJ
3457   in local and then by concatenating the local matrices the end result.
3458   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3459 
3460   Note: This requires a sequential iscol with all indices.
3461 */
3462 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3463 {
3464   PetscErrorCode ierr;
3465   PetscMPIInt    rank,size;
3466   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3467   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3468   Mat            *local,M,Mreuse;
3469   MatScalar      *vwork,*aa;
3470   MPI_Comm       comm = ((PetscObject)mat)->comm;
3471   Mat_SeqAIJ     *aij;
3472 
3473 
3474   PetscFunctionBegin;
3475   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3476   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3477 
3478   if (call ==  MAT_REUSE_MATRIX) {
3479     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3480     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3481     local = &Mreuse;
3482     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3483   } else {
3484     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3485     Mreuse = *local;
3486     ierr   = PetscFree(local);CHKERRQ(ierr);
3487   }
3488 
3489   /*
3490       m - number of local rows
3491       n - number of columns (same on all processors)
3492       rstart - first row in new global matrix generated
3493   */
3494   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3495   if (call == MAT_INITIAL_MATRIX) {
3496     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3497     ii  = aij->i;
3498     jj  = aij->j;
3499 
3500     /*
3501         Determine the number of non-zeros in the diagonal and off-diagonal
3502         portions of the matrix in order to do correct preallocation
3503     */
3504 
3505     /* first get start and end of "diagonal" columns */
3506     if (csize == PETSC_DECIDE) {
3507       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3508       if (mglobal == n) { /* square matrix */
3509 	nlocal = m;
3510       } else {
3511         nlocal = n/size + ((n % size) > rank);
3512       }
3513     } else {
3514       nlocal = csize;
3515     }
3516     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3517     rstart = rend - nlocal;
3518     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3519 
3520     /* next, compute all the lengths */
3521     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3522     olens = dlens + m;
3523     for (i=0; i<m; i++) {
3524       jend = ii[i+1] - ii[i];
3525       olen = 0;
3526       dlen = 0;
3527       for (j=0; j<jend; j++) {
3528         if (*jj < rstart || *jj >= rend) olen++;
3529         else dlen++;
3530         jj++;
3531       }
3532       olens[i] = olen;
3533       dlens[i] = dlen;
3534     }
3535     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3536     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3537     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3538     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3539     ierr = PetscFree(dlens);CHKERRQ(ierr);
3540   } else {
3541     PetscInt ml,nl;
3542 
3543     M = *newmat;
3544     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3545     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3546     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3547     /*
3548          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3549        rather than the slower MatSetValues().
3550     */
3551     M->was_assembled = PETSC_TRUE;
3552     M->assembled     = PETSC_FALSE;
3553   }
3554   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3555   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3556   ii  = aij->i;
3557   jj  = aij->j;
3558   aa  = aij->a;
3559   for (i=0; i<m; i++) {
3560     row   = rstart + i;
3561     nz    = ii[i+1] - ii[i];
3562     cwork = jj;     jj += nz;
3563     vwork = aa;     aa += nz;
3564     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3565   }
3566 
3567   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3568   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3569   *newmat = M;
3570 
3571   /* save submatrix used in processor for next request */
3572   if (call ==  MAT_INITIAL_MATRIX) {
3573     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3574     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3575   }
3576 
3577   PetscFunctionReturn(0);
3578 }
3579 
3580 EXTERN_C_BEGIN
3581 #undef __FUNCT__
3582 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3583 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3584 {
3585   PetscInt       m,cstart, cend,j,nnz,i,d;
3586   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3587   const PetscInt *JJ;
3588   PetscScalar    *values;
3589   PetscErrorCode ierr;
3590 
3591   PetscFunctionBegin;
3592   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3593 
3594   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3595   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3596   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3597   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3598   m      = B->rmap->n;
3599   cstart = B->cmap->rstart;
3600   cend   = B->cmap->rend;
3601   rstart = B->rmap->rstart;
3602 
3603   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3604 
3605 #if defined(PETSC_USE_DEBUGGING)
3606   for (i=0; i<m; i++) {
3607     nnz     = Ii[i+1]- Ii[i];
3608     JJ      = J + Ii[i];
3609     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3610     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3611     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3612   }
3613 #endif
3614 
3615   for (i=0; i<m; i++) {
3616     nnz     = Ii[i+1]- Ii[i];
3617     JJ      = J + Ii[i];
3618     nnz_max = PetscMax(nnz_max,nnz);
3619     d       = 0;
3620     for (j=0; j<nnz; j++) {
3621       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3622     }
3623     d_nnz[i] = d;
3624     o_nnz[i] = nnz - d;
3625   }
3626   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3627   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3628 
3629   if (v) values = (PetscScalar*)v;
3630   else {
3631     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3632     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3633   }
3634 
3635   for (i=0; i<m; i++) {
3636     ii   = i + rstart;
3637     nnz  = Ii[i+1]- Ii[i];
3638     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3639   }
3640   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3641   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3642 
3643   if (!v) {
3644     ierr = PetscFree(values);CHKERRQ(ierr);
3645   }
3646   PetscFunctionReturn(0);
3647 }
3648 EXTERN_C_END
3649 
3650 #undef __FUNCT__
3651 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3652 /*@
3653    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3654    (the default parallel PETSc format).
3655 
3656    Collective on MPI_Comm
3657 
3658    Input Parameters:
3659 +  B - the matrix
3660 .  i - the indices into j for the start of each local row (starts with zero)
3661 .  j - the column indices for each local row (starts with zero)
3662 -  v - optional values in the matrix
3663 
3664    Level: developer
3665 
3666    Notes:
3667        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3668      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3669      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3670 
3671        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3672 
3673        The format which is used for the sparse matrix input, is equivalent to a
3674     row-major ordering.. i.e for the following matrix, the input data expected is
3675     as shown:
3676 
3677         1 0 0
3678         2 0 3     P0
3679        -------
3680         4 5 6     P1
3681 
3682      Process0 [P0]: rows_owned=[0,1]
3683         i =  {0,1,3}  [size = nrow+1  = 2+1]
3684         j =  {0,0,2}  [size = nz = 6]
3685         v =  {1,2,3}  [size = nz = 6]
3686 
3687      Process1 [P1]: rows_owned=[2]
3688         i =  {0,3}    [size = nrow+1  = 1+1]
3689         j =  {0,1,2}  [size = nz = 6]
3690         v =  {4,5,6}  [size = nz = 6]
3691 
3692 .keywords: matrix, aij, compressed row, sparse, parallel
3693 
3694 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3695           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3696 @*/
3697 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3698 {
3699   PetscErrorCode ierr;
3700 
3701   PetscFunctionBegin;
3702   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3703   PetscFunctionReturn(0);
3704 }
3705 
3706 #undef __FUNCT__
3707 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3708 /*@C
3709    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3710    (the default parallel PETSc format).  For good matrix assembly performance
3711    the user should preallocate the matrix storage by setting the parameters
3712    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3713    performance can be increased by more than a factor of 50.
3714 
3715    Collective on MPI_Comm
3716 
3717    Input Parameters:
3718 +  A - the matrix
3719 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3720            (same value is used for all local rows)
3721 .  d_nnz - array containing the number of nonzeros in the various rows of the
3722            DIAGONAL portion of the local submatrix (possibly different for each row)
3723            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3724            The size of this array is equal to the number of local rows, i.e 'm'.
3725            You must leave room for the diagonal entry even if it is zero.
3726 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3727            submatrix (same value is used for all local rows).
3728 -  o_nnz - array containing the number of nonzeros in the various rows of the
3729            OFF-DIAGONAL portion of the local submatrix (possibly different for
3730            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3731            structure. The size of this array is equal to the number
3732            of local rows, i.e 'm'.
3733 
3734    If the *_nnz parameter is given then the *_nz parameter is ignored
3735 
3736    The AIJ format (also called the Yale sparse matrix format or
3737    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3738    storage.  The stored row and column indices begin with zero.
3739    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3740 
3741    The parallel matrix is partitioned such that the first m0 rows belong to
3742    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3743    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3744 
3745    The DIAGONAL portion of the local submatrix of a processor can be defined
3746    as the submatrix which is obtained by extraction the part corresponding to
3747    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3748    first row that belongs to the processor, r2 is the last row belonging to
3749    the this processor, and c1-c2 is range of indices of the local part of a
3750    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3751    common case of a square matrix, the row and column ranges are the same and
3752    the DIAGONAL part is also square. The remaining portion of the local
3753    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3754 
3755    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3756 
3757    You can call MatGetInfo() to get information on how effective the preallocation was;
3758    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3759    You can also run with the option -info and look for messages with the string
3760    malloc in them to see if additional memory allocation was needed.
3761 
3762    Example usage:
3763 
3764    Consider the following 8x8 matrix with 34 non-zero values, that is
3765    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3766    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3767    as follows:
3768 
3769 .vb
3770             1  2  0  |  0  3  0  |  0  4
3771     Proc0   0  5  6  |  7  0  0  |  8  0
3772             9  0 10  | 11  0  0  | 12  0
3773     -------------------------------------
3774            13  0 14  | 15 16 17  |  0  0
3775     Proc1   0 18  0  | 19 20 21  |  0  0
3776             0  0  0  | 22 23  0  | 24  0
3777     -------------------------------------
3778     Proc2  25 26 27  |  0  0 28  | 29  0
3779            30  0  0  | 31 32 33  |  0 34
3780 .ve
3781 
3782    This can be represented as a collection of submatrices as:
3783 
3784 .vb
3785       A B C
3786       D E F
3787       G H I
3788 .ve
3789 
3790    Where the submatrices A,B,C are owned by proc0, D,E,F are
3791    owned by proc1, G,H,I are owned by proc2.
3792 
3793    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3794    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3795    The 'M','N' parameters are 8,8, and have the same values on all procs.
3796 
3797    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3798    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3799    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3800    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3801    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3802    matrix, ans [DF] as another SeqAIJ matrix.
3803 
3804    When d_nz, o_nz parameters are specified, d_nz storage elements are
3805    allocated for every row of the local diagonal submatrix, and o_nz
3806    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3807    One way to choose d_nz and o_nz is to use the max nonzerors per local
3808    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3809    In this case, the values of d_nz,o_nz are:
3810 .vb
3811      proc0 : dnz = 2, o_nz = 2
3812      proc1 : dnz = 3, o_nz = 2
3813      proc2 : dnz = 1, o_nz = 4
3814 .ve
3815    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3816    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3817    for proc3. i.e we are using 12+15+10=37 storage locations to store
3818    34 values.
3819 
3820    When d_nnz, o_nnz parameters are specified, the storage is specified
3821    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3822    In the above case the values for d_nnz,o_nnz are:
3823 .vb
3824      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3825      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3826      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3827 .ve
3828    Here the space allocated is sum of all the above values i.e 34, and
3829    hence pre-allocation is perfect.
3830 
3831    Level: intermediate
3832 
3833 .keywords: matrix, aij, compressed row, sparse, parallel
3834 
3835 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3836           MPIAIJ, MatGetInfo()
3837 @*/
3838 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3839 {
3840   PetscErrorCode ierr;
3841 
3842   PetscFunctionBegin;
3843   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3844   PetscFunctionReturn(0);
3845 }
3846 
3847 #undef __FUNCT__
3848 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3849 /*@
3850      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3851          CSR format the local rows.
3852 
3853    Collective on MPI_Comm
3854 
3855    Input Parameters:
3856 +  comm - MPI communicator
3857 .  m - number of local rows (Cannot be PETSC_DECIDE)
3858 .  n - This value should be the same as the local size used in creating the
3859        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3860        calculated if N is given) For square matrices n is almost always m.
3861 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3862 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3863 .   i - row indices
3864 .   j - column indices
3865 -   a - matrix values
3866 
3867    Output Parameter:
3868 .   mat - the matrix
3869 
3870    Level: intermediate
3871 
3872    Notes:
3873        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3874      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3875      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3876 
3877        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3878 
3879        The format which is used for the sparse matrix input, is equivalent to a
3880     row-major ordering.. i.e for the following matrix, the input data expected is
3881     as shown:
3882 
3883         1 0 0
3884         2 0 3     P0
3885        -------
3886         4 5 6     P1
3887 
3888      Process0 [P0]: rows_owned=[0,1]
3889         i =  {0,1,3}  [size = nrow+1  = 2+1]
3890         j =  {0,0,2}  [size = nz = 6]
3891         v =  {1,2,3}  [size = nz = 6]
3892 
3893      Process1 [P1]: rows_owned=[2]
3894         i =  {0,3}    [size = nrow+1  = 1+1]
3895         j =  {0,1,2}  [size = nz = 6]
3896         v =  {4,5,6}  [size = nz = 6]
3897 
3898 .keywords: matrix, aij, compressed row, sparse, parallel
3899 
3900 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3901           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3902 @*/
3903 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3904 {
3905   PetscErrorCode ierr;
3906 
3907  PetscFunctionBegin;
3908   if (i[0]) {
3909     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3910   }
3911   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3912   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3913   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3914   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3915   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3916   PetscFunctionReturn(0);
3917 }
3918 
3919 #undef __FUNCT__
3920 #define __FUNCT__ "MatCreateMPIAIJ"
3921 /*@C
3922    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3923    (the default parallel PETSc format).  For good matrix assembly performance
3924    the user should preallocate the matrix storage by setting the parameters
3925    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3926    performance can be increased by more than a factor of 50.
3927 
3928    Collective on MPI_Comm
3929 
3930    Input Parameters:
3931 +  comm - MPI communicator
3932 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3933            This value should be the same as the local size used in creating the
3934            y vector for the matrix-vector product y = Ax.
3935 .  n - This value should be the same as the local size used in creating the
3936        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3937        calculated if N is given) For square matrices n is almost always m.
3938 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3939 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3940 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3941            (same value is used for all local rows)
3942 .  d_nnz - array containing the number of nonzeros in the various rows of the
3943            DIAGONAL portion of the local submatrix (possibly different for each row)
3944            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3945            The size of this array is equal to the number of local rows, i.e 'm'.
3946            You must leave room for the diagonal entry even if it is zero.
3947 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3948            submatrix (same value is used for all local rows).
3949 -  o_nnz - array containing the number of nonzeros in the various rows of the
3950            OFF-DIAGONAL portion of the local submatrix (possibly different for
3951            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3952            structure. The size of this array is equal to the number
3953            of local rows, i.e 'm'.
3954 
3955    Output Parameter:
3956 .  A - the matrix
3957 
3958    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3959    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3960    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3961 
3962    Notes:
3963    If the *_nnz parameter is given then the *_nz parameter is ignored
3964 
3965    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3966    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3967    storage requirements for this matrix.
3968 
3969    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3970    processor than it must be used on all processors that share the object for
3971    that argument.
3972 
3973    The user MUST specify either the local or global matrix dimensions
3974    (possibly both).
3975 
3976    The parallel matrix is partitioned across processors such that the
3977    first m0 rows belong to process 0, the next m1 rows belong to
3978    process 1, the next m2 rows belong to process 2 etc.. where
3979    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3980    values corresponding to [m x N] submatrix.
3981 
3982    The columns are logically partitioned with the n0 columns belonging
3983    to 0th partition, the next n1 columns belonging to the next
3984    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3985 
3986    The DIAGONAL portion of the local submatrix on any given processor
3987    is the submatrix corresponding to the rows and columns m,n
3988    corresponding to the given processor. i.e diagonal matrix on
3989    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3990    etc. The remaining portion of the local submatrix [m x (N-n)]
3991    constitute the OFF-DIAGONAL portion. The example below better
3992    illustrates this concept.
3993 
3994    For a square global matrix we define each processor's diagonal portion
3995    to be its local rows and the corresponding columns (a square submatrix);
3996    each processor's off-diagonal portion encompasses the remainder of the
3997    local matrix (a rectangular submatrix).
3998 
3999    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4000 
4001    When calling this routine with a single process communicator, a matrix of
4002    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4003    type of communicator, use the construction mechanism:
4004      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4005 
4006    By default, this format uses inodes (identical nodes) when possible.
4007    We search for consecutive rows with the same nonzero structure, thereby
4008    reusing matrix information to achieve increased efficiency.
4009 
4010    Options Database Keys:
4011 +  -mat_no_inode  - Do not use inodes
4012 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4013 -  -mat_aij_oneindex - Internally use indexing starting at 1
4014         rather than 0.  Note that when calling MatSetValues(),
4015         the user still MUST index entries starting at 0!
4016 
4017 
4018    Example usage:
4019 
4020    Consider the following 8x8 matrix with 34 non-zero values, that is
4021    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4022    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4023    as follows:
4024 
4025 .vb
4026             1  2  0  |  0  3  0  |  0  4
4027     Proc0   0  5  6  |  7  0  0  |  8  0
4028             9  0 10  | 11  0  0  | 12  0
4029     -------------------------------------
4030            13  0 14  | 15 16 17  |  0  0
4031     Proc1   0 18  0  | 19 20 21  |  0  0
4032             0  0  0  | 22 23  0  | 24  0
4033     -------------------------------------
4034     Proc2  25 26 27  |  0  0 28  | 29  0
4035            30  0  0  | 31 32 33  |  0 34
4036 .ve
4037 
4038    This can be represented as a collection of submatrices as:
4039 
4040 .vb
4041       A B C
4042       D E F
4043       G H I
4044 .ve
4045 
4046    Where the submatrices A,B,C are owned by proc0, D,E,F are
4047    owned by proc1, G,H,I are owned by proc2.
4048 
4049    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4050    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4051    The 'M','N' parameters are 8,8, and have the same values on all procs.
4052 
4053    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4054    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4055    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4056    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4057    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4058    matrix, ans [DF] as another SeqAIJ matrix.
4059 
4060    When d_nz, o_nz parameters are specified, d_nz storage elements are
4061    allocated for every row of the local diagonal submatrix, and o_nz
4062    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4063    One way to choose d_nz and o_nz is to use the max nonzerors per local
4064    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4065    In this case, the values of d_nz,o_nz are:
4066 .vb
4067      proc0 : dnz = 2, o_nz = 2
4068      proc1 : dnz = 3, o_nz = 2
4069      proc2 : dnz = 1, o_nz = 4
4070 .ve
4071    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4072    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4073    for proc3. i.e we are using 12+15+10=37 storage locations to store
4074    34 values.
4075 
4076    When d_nnz, o_nnz parameters are specified, the storage is specified
4077    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4078    In the above case the values for d_nnz,o_nnz are:
4079 .vb
4080      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4081      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4082      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4083 .ve
4084    Here the space allocated is sum of all the above values i.e 34, and
4085    hence pre-allocation is perfect.
4086 
4087    Level: intermediate
4088 
4089 .keywords: matrix, aij, compressed row, sparse, parallel
4090 
4091 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4092           MPIAIJ, MatCreateMPIAIJWithArrays()
4093 @*/
4094 PetscErrorCode  MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4095 {
4096   PetscErrorCode ierr;
4097   PetscMPIInt    size;
4098 
4099   PetscFunctionBegin;
4100   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4101   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4102   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4103   if (size > 1) {
4104     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4105     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4106   } else {
4107     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4108     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4109   }
4110   PetscFunctionReturn(0);
4111 }
4112 
4113 #undef __FUNCT__
4114 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4115 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4116 {
4117   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4118 
4119   PetscFunctionBegin;
4120   *Ad     = a->A;
4121   *Ao     = a->B;
4122   *colmap = a->garray;
4123   PetscFunctionReturn(0);
4124 }
4125 
4126 #undef __FUNCT__
4127 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4128 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4129 {
4130   PetscErrorCode ierr;
4131   PetscInt       i;
4132   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4133 
4134   PetscFunctionBegin;
4135   if (coloring->ctype == IS_COLORING_GLOBAL) {
4136     ISColoringValue *allcolors,*colors;
4137     ISColoring      ocoloring;
4138 
4139     /* set coloring for diagonal portion */
4140     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4141 
4142     /* set coloring for off-diagonal portion */
4143     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4144     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4145     for (i=0; i<a->B->cmap->n; i++) {
4146       colors[i] = allcolors[a->garray[i]];
4147     }
4148     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4149     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4150     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4151     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
4152   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4153     ISColoringValue *colors;
4154     PetscInt        *larray;
4155     ISColoring      ocoloring;
4156 
4157     /* set coloring for diagonal portion */
4158     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4159     for (i=0; i<a->A->cmap->n; i++) {
4160       larray[i] = i + A->cmap->rstart;
4161     }
4162     ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4163     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4164     for (i=0; i<a->A->cmap->n; i++) {
4165       colors[i] = coloring->colors[larray[i]];
4166     }
4167     ierr = PetscFree(larray);CHKERRQ(ierr);
4168     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4169     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4170     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
4171 
4172     /* set coloring for off-diagonal portion */
4173     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4174     ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4175     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4176     for (i=0; i<a->B->cmap->n; i++) {
4177       colors[i] = coloring->colors[larray[i]];
4178     }
4179     ierr = PetscFree(larray);CHKERRQ(ierr);
4180     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4181     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4182     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
4183   } else {
4184     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4185   }
4186 
4187   PetscFunctionReturn(0);
4188 }
4189 
4190 #if defined(PETSC_HAVE_ADIC)
4191 #undef __FUNCT__
4192 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4193 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4194 {
4195   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4196   PetscErrorCode ierr;
4197 
4198   PetscFunctionBegin;
4199   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4200   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4201   PetscFunctionReturn(0);
4202 }
4203 #endif
4204 
4205 #undef __FUNCT__
4206 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4207 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4208 {
4209   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4210   PetscErrorCode ierr;
4211 
4212   PetscFunctionBegin;
4213   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4214   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4215   PetscFunctionReturn(0);
4216 }
4217 
4218 #undef __FUNCT__
4219 #define __FUNCT__ "MatMerge"
4220 /*@
4221       MatMerge - Creates a single large PETSc matrix by concatinating sequential
4222                  matrices from each processor
4223 
4224     Collective on MPI_Comm
4225 
4226    Input Parameters:
4227 +    comm - the communicators the parallel matrix will live on
4228 .    inmat - the input sequential matrices
4229 .    n - number of local columns (or PETSC_DECIDE)
4230 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4231 
4232    Output Parameter:
4233 .    outmat - the parallel matrix generated
4234 
4235     Level: advanced
4236 
4237    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4238 
4239 @*/
4240 PetscErrorCode  MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4241 {
4242   PetscErrorCode ierr;
4243   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
4244   PetscInt       *indx;
4245   PetscScalar    *values;
4246 
4247   PetscFunctionBegin;
4248   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4249   if (scall == MAT_INITIAL_MATRIX){
4250     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
4251     if (n == PETSC_DECIDE){
4252       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4253     }
4254     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4255     rstart -= m;
4256 
4257     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4258     for (i=0;i<m;i++) {
4259       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4260       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4261       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4262     }
4263     /* This routine will ONLY return MPIAIJ type matrix */
4264     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4265     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4266     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4267     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4268     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4269 
4270   } else if (scall == MAT_REUSE_MATRIX){
4271     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4272   } else {
4273     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4274   }
4275 
4276   for (i=0;i<m;i++) {
4277     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4278     Ii    = i + rstart;
4279     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4280     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4281   }
4282   ierr = MatDestroy(inmat);CHKERRQ(ierr);
4283   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4284   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4285 
4286   PetscFunctionReturn(0);
4287 }
4288 
4289 #undef __FUNCT__
4290 #define __FUNCT__ "MatFileSplit"
4291 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4292 {
4293   PetscErrorCode    ierr;
4294   PetscMPIInt       rank;
4295   PetscInt          m,N,i,rstart,nnz;
4296   size_t            len;
4297   const PetscInt    *indx;
4298   PetscViewer       out;
4299   char              *name;
4300   Mat               B;
4301   const PetscScalar *values;
4302 
4303   PetscFunctionBegin;
4304   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4305   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4306   /* Should this be the type of the diagonal block of A? */
4307   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4308   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4309   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4310   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4311   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4312   for (i=0;i<m;i++) {
4313     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4314     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4315     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4316   }
4317   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4318   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4319 
4320   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4321   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4322   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4323   sprintf(name,"%s.%d",outfile,rank);
4324   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4325   ierr = PetscFree(name);
4326   ierr = MatView(B,out);CHKERRQ(ierr);
4327   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
4328   ierr = MatDestroy(B);CHKERRQ(ierr);
4329   PetscFunctionReturn(0);
4330 }
4331 
4332 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4333 #undef __FUNCT__
4334 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4335 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4336 {
4337   PetscErrorCode       ierr;
4338   Mat_Merge_SeqsToMPI  *merge;
4339   PetscContainer       container;
4340 
4341   PetscFunctionBegin;
4342   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4343   if (container) {
4344     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4345     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4346     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4347     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4348     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4349     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4350     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4351     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4352     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4353     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4354     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4355     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4356     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4357     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4358 
4359     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4360     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4361   }
4362   ierr = PetscFree(merge);CHKERRQ(ierr);
4363 
4364   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4365   PetscFunctionReturn(0);
4366 }
4367 
4368 #include "../src/mat/utils/freespace.h"
4369 #include "petscbt.h"
4370 
4371 #undef __FUNCT__
4372 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4373 /*@C
4374       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4375                  matrices from each processor
4376 
4377     Collective on MPI_Comm
4378 
4379    Input Parameters:
4380 +    comm - the communicators the parallel matrix will live on
4381 .    seqmat - the input sequential matrices
4382 .    m - number of local rows (or PETSC_DECIDE)
4383 .    n - number of local columns (or PETSC_DECIDE)
4384 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4385 
4386    Output Parameter:
4387 .    mpimat - the parallel matrix generated
4388 
4389     Level: advanced
4390 
4391    Notes:
4392      The dimensions of the sequential matrix in each processor MUST be the same.
4393      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4394      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4395 @*/
4396 PetscErrorCode  MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4397 {
4398   PetscErrorCode       ierr;
4399   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4400   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4401   PetscMPIInt          size,rank,taga,*len_s;
4402   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4403   PetscInt             proc,m;
4404   PetscInt             **buf_ri,**buf_rj;
4405   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4406   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4407   MPI_Request          *s_waits,*r_waits;
4408   MPI_Status           *status;
4409   MatScalar            *aa=a->a;
4410   MatScalar            **abuf_r,*ba_i;
4411   Mat_Merge_SeqsToMPI  *merge;
4412   PetscContainer       container;
4413 
4414   PetscFunctionBegin;
4415   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4416 
4417   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4418   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4419 
4420   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4421   if (container) {
4422     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4423   }
4424   bi     = merge->bi;
4425   bj     = merge->bj;
4426   buf_ri = merge->buf_ri;
4427   buf_rj = merge->buf_rj;
4428 
4429   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4430   owners = merge->rowmap->range;
4431   len_s  = merge->len_s;
4432 
4433   /* send and recv matrix values */
4434   /*-----------------------------*/
4435   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4436   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4437 
4438   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4439   for (proc=0,k=0; proc<size; proc++){
4440     if (!len_s[proc]) continue;
4441     i = owners[proc];
4442     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4443     k++;
4444   }
4445 
4446   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4447   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4448   ierr = PetscFree(status);CHKERRQ(ierr);
4449 
4450   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4451   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4452 
4453   /* insert mat values of mpimat */
4454   /*----------------------------*/
4455   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4456   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4457 
4458   for (k=0; k<merge->nrecv; k++){
4459     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4460     nrows = *(buf_ri_k[k]);
4461     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4462     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4463   }
4464 
4465   /* set values of ba */
4466   m = merge->rowmap->n;
4467   for (i=0; i<m; i++) {
4468     arow = owners[rank] + i;
4469     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4470     bnzi = bi[i+1] - bi[i];
4471     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4472 
4473     /* add local non-zero vals of this proc's seqmat into ba */
4474     anzi = ai[arow+1] - ai[arow];
4475     aj   = a->j + ai[arow];
4476     aa   = a->a + ai[arow];
4477     nextaj = 0;
4478     for (j=0; nextaj<anzi; j++){
4479       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4480         ba_i[j] += aa[nextaj++];
4481       }
4482     }
4483 
4484     /* add received vals into ba */
4485     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4486       /* i-th row */
4487       if (i == *nextrow[k]) {
4488         anzi = *(nextai[k]+1) - *nextai[k];
4489         aj   = buf_rj[k] + *(nextai[k]);
4490         aa   = abuf_r[k] + *(nextai[k]);
4491         nextaj = 0;
4492         for (j=0; nextaj<anzi; j++){
4493           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4494             ba_i[j] += aa[nextaj++];
4495           }
4496         }
4497         nextrow[k]++; nextai[k]++;
4498       }
4499     }
4500     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4501   }
4502   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4503   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4504 
4505   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4506   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4507   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4508   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4509   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4510   PetscFunctionReturn(0);
4511 }
4512 
4513 #undef __FUNCT__
4514 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4515 PetscErrorCode  MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4516 {
4517   PetscErrorCode       ierr;
4518   Mat                  B_mpi;
4519   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4520   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4521   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4522   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4523   PetscInt             len,proc,*dnz,*onz;
4524   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4525   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4526   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4527   MPI_Status           *status;
4528   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4529   PetscBT              lnkbt;
4530   Mat_Merge_SeqsToMPI  *merge;
4531   PetscContainer       container;
4532 
4533   PetscFunctionBegin;
4534   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4535 
4536   /* make sure it is a PETSc comm */
4537   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4538   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4539   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4540 
4541   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4542   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4543 
4544   /* determine row ownership */
4545   /*---------------------------------------------------------*/
4546   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4547   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4548   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4549   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4550   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4551   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4552   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4553 
4554   m      = merge->rowmap->n;
4555   M      = merge->rowmap->N;
4556   owners = merge->rowmap->range;
4557 
4558   /* determine the number of messages to send, their lengths */
4559   /*---------------------------------------------------------*/
4560   len_s  = merge->len_s;
4561 
4562   len = 0;  /* length of buf_si[] */
4563   merge->nsend = 0;
4564   for (proc=0; proc<size; proc++){
4565     len_si[proc] = 0;
4566     if (proc == rank){
4567       len_s[proc] = 0;
4568     } else {
4569       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4570       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4571     }
4572     if (len_s[proc]) {
4573       merge->nsend++;
4574       nrows = 0;
4575       for (i=owners[proc]; i<owners[proc+1]; i++){
4576         if (ai[i+1] > ai[i]) nrows++;
4577       }
4578       len_si[proc] = 2*(nrows+1);
4579       len += len_si[proc];
4580     }
4581   }
4582 
4583   /* determine the number and length of messages to receive for ij-structure */
4584   /*-------------------------------------------------------------------------*/
4585   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4586   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4587 
4588   /* post the Irecv of j-structure */
4589   /*-------------------------------*/
4590   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4591   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4592 
4593   /* post the Isend of j-structure */
4594   /*--------------------------------*/
4595   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4596 
4597   for (proc=0, k=0; proc<size; proc++){
4598     if (!len_s[proc]) continue;
4599     i = owners[proc];
4600     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4601     k++;
4602   }
4603 
4604   /* receives and sends of j-structure are complete */
4605   /*------------------------------------------------*/
4606   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4607   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4608 
4609   /* send and recv i-structure */
4610   /*---------------------------*/
4611   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4612   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4613 
4614   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4615   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4616   for (proc=0,k=0; proc<size; proc++){
4617     if (!len_s[proc]) continue;
4618     /* form outgoing message for i-structure:
4619          buf_si[0]:                 nrows to be sent
4620                [1:nrows]:           row index (global)
4621                [nrows+1:2*nrows+1]: i-structure index
4622     */
4623     /*-------------------------------------------*/
4624     nrows = len_si[proc]/2 - 1;
4625     buf_si_i    = buf_si + nrows+1;
4626     buf_si[0]   = nrows;
4627     buf_si_i[0] = 0;
4628     nrows = 0;
4629     for (i=owners[proc]; i<owners[proc+1]; i++){
4630       anzi = ai[i+1] - ai[i];
4631       if (anzi) {
4632         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4633         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4634         nrows++;
4635       }
4636     }
4637     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4638     k++;
4639     buf_si += len_si[proc];
4640   }
4641 
4642   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4643   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4644 
4645   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4646   for (i=0; i<merge->nrecv; i++){
4647     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4648   }
4649 
4650   ierr = PetscFree(len_si);CHKERRQ(ierr);
4651   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4652   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4653   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4654   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4655   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4656   ierr = PetscFree(status);CHKERRQ(ierr);
4657 
4658   /* compute a local seq matrix in each processor */
4659   /*----------------------------------------------*/
4660   /* allocate bi array and free space for accumulating nonzero column info */
4661   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4662   bi[0] = 0;
4663 
4664   /* create and initialize a linked list */
4665   nlnk = N+1;
4666   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4667 
4668   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4669   len = 0;
4670   len  = ai[owners[rank+1]] - ai[owners[rank]];
4671   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4672   current_space = free_space;
4673 
4674   /* determine symbolic info for each local row */
4675   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4676 
4677   for (k=0; k<merge->nrecv; k++){
4678     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4679     nrows = *buf_ri_k[k];
4680     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4681     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4682   }
4683 
4684   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4685   len = 0;
4686   for (i=0;i<m;i++) {
4687     bnzi   = 0;
4688     /* add local non-zero cols of this proc's seqmat into lnk */
4689     arow   = owners[rank] + i;
4690     anzi   = ai[arow+1] - ai[arow];
4691     aj     = a->j + ai[arow];
4692     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4693     bnzi += nlnk;
4694     /* add received col data into lnk */
4695     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4696       if (i == *nextrow[k]) { /* i-th row */
4697         anzi = *(nextai[k]+1) - *nextai[k];
4698         aj   = buf_rj[k] + *nextai[k];
4699         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4700         bnzi += nlnk;
4701         nextrow[k]++; nextai[k]++;
4702       }
4703     }
4704     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4705 
4706     /* if free space is not available, make more free space */
4707     if (current_space->local_remaining<bnzi) {
4708       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4709       nspacedouble++;
4710     }
4711     /* copy data into free space, then initialize lnk */
4712     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4713     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4714 
4715     current_space->array           += bnzi;
4716     current_space->local_used      += bnzi;
4717     current_space->local_remaining -= bnzi;
4718 
4719     bi[i+1] = bi[i] + bnzi;
4720   }
4721 
4722   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4723 
4724   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4725   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4726   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4727 
4728   /* create symbolic parallel matrix B_mpi */
4729   /*---------------------------------------*/
4730   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4731   if (n==PETSC_DECIDE) {
4732     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4733   } else {
4734     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4735   }
4736   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4737   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4738   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4739 
4740   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4741   B_mpi->assembled     = PETSC_FALSE;
4742   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4743   merge->bi            = bi;
4744   merge->bj            = bj;
4745   merge->buf_ri        = buf_ri;
4746   merge->buf_rj        = buf_rj;
4747   merge->coi           = PETSC_NULL;
4748   merge->coj           = PETSC_NULL;
4749   merge->owners_co     = PETSC_NULL;
4750 
4751   /* attach the supporting struct to B_mpi for reuse */
4752   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4753   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4754   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4755   *mpimat = B_mpi;
4756 
4757   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4758   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4759   PetscFunctionReturn(0);
4760 }
4761 
4762 #undef __FUNCT__
4763 #define __FUNCT__ "MatMerge_SeqsToMPI"
4764 PetscErrorCode  MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4765 {
4766   PetscErrorCode   ierr;
4767 
4768   PetscFunctionBegin;
4769   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4770   if (scall == MAT_INITIAL_MATRIX){
4771     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4772   }
4773   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4774   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4775   PetscFunctionReturn(0);
4776 }
4777 
4778 #undef __FUNCT__
4779 #define __FUNCT__ "MatGetLocalMat"
4780 /*@
4781      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4782 
4783     Not Collective
4784 
4785    Input Parameters:
4786 +    A - the matrix
4787 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4788 
4789    Output Parameter:
4790 .    A_loc - the local sequential matrix generated
4791 
4792     Level: developer
4793 
4794 @*/
4795 PetscErrorCode  MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4796 {
4797   PetscErrorCode  ierr;
4798   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4799   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4800   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4801   MatScalar       *aa=a->a,*ba=b->a,*cam;
4802   PetscScalar     *ca;
4803   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4804   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4805 
4806   PetscFunctionBegin;
4807   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4808   if (scall == MAT_INITIAL_MATRIX){
4809     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4810     ci[0] = 0;
4811     for (i=0; i<am; i++){
4812       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4813     }
4814     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4815     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4816     k = 0;
4817     for (i=0; i<am; i++) {
4818       ncols_o = bi[i+1] - bi[i];
4819       ncols_d = ai[i+1] - ai[i];
4820       /* off-diagonal portion of A */
4821       for (jo=0; jo<ncols_o; jo++) {
4822         col = cmap[*bj];
4823         if (col >= cstart) break;
4824         cj[k]   = col; bj++;
4825         ca[k++] = *ba++;
4826       }
4827       /* diagonal portion of A */
4828       for (j=0; j<ncols_d; j++) {
4829         cj[k]   = cstart + *aj++;
4830         ca[k++] = *aa++;
4831       }
4832       /* off-diagonal portion of A */
4833       for (j=jo; j<ncols_o; j++) {
4834         cj[k]   = cmap[*bj++];
4835         ca[k++] = *ba++;
4836       }
4837     }
4838     /* put together the new matrix */
4839     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4840     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4841     /* Since these are PETSc arrays, change flags to free them as necessary. */
4842     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4843     mat->free_a  = PETSC_TRUE;
4844     mat->free_ij = PETSC_TRUE;
4845     mat->nonew   = 0;
4846   } else if (scall == MAT_REUSE_MATRIX){
4847     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4848     ci = mat->i; cj = mat->j; cam = mat->a;
4849     for (i=0; i<am; i++) {
4850       /* off-diagonal portion of A */
4851       ncols_o = bi[i+1] - bi[i];
4852       for (jo=0; jo<ncols_o; jo++) {
4853         col = cmap[*bj];
4854         if (col >= cstart) break;
4855         *cam++ = *ba++; bj++;
4856       }
4857       /* diagonal portion of A */
4858       ncols_d = ai[i+1] - ai[i];
4859       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4860       /* off-diagonal portion of A */
4861       for (j=jo; j<ncols_o; j++) {
4862         *cam++ = *ba++; bj++;
4863       }
4864     }
4865   } else {
4866     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4867   }
4868 
4869   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4870   PetscFunctionReturn(0);
4871 }
4872 
4873 #undef __FUNCT__
4874 #define __FUNCT__ "MatGetLocalMatCondensed"
4875 /*@C
4876      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4877 
4878     Not Collective
4879 
4880    Input Parameters:
4881 +    A - the matrix
4882 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4883 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4884 
4885    Output Parameter:
4886 .    A_loc - the local sequential matrix generated
4887 
4888     Level: developer
4889 
4890 @*/
4891 PetscErrorCode  MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4892 {
4893   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4894   PetscErrorCode    ierr;
4895   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4896   IS                isrowa,iscola;
4897   Mat               *aloc;
4898 
4899   PetscFunctionBegin;
4900   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4901   if (!row){
4902     start = A->rmap->rstart; end = A->rmap->rend;
4903     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4904   } else {
4905     isrowa = *row;
4906   }
4907   if (!col){
4908     start = A->cmap->rstart;
4909     cmap  = a->garray;
4910     nzA   = a->A->cmap->n;
4911     nzB   = a->B->cmap->n;
4912     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4913     ncols = 0;
4914     for (i=0; i<nzB; i++) {
4915       if (cmap[i] < start) idx[ncols++] = cmap[i];
4916       else break;
4917     }
4918     imark = i;
4919     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4920     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4921     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4922   } else {
4923     iscola = *col;
4924   }
4925   if (scall != MAT_INITIAL_MATRIX){
4926     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4927     aloc[0] = *A_loc;
4928   }
4929   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4930   *A_loc = aloc[0];
4931   ierr = PetscFree(aloc);CHKERRQ(ierr);
4932   if (!row){
4933     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4934   }
4935   if (!col){
4936     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4937   }
4938   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4939   PetscFunctionReturn(0);
4940 }
4941 
4942 #undef __FUNCT__
4943 #define __FUNCT__ "MatGetBrowsOfAcols"
4944 /*@C
4945     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4946 
4947     Collective on Mat
4948 
4949    Input Parameters:
4950 +    A,B - the matrices in mpiaij format
4951 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4952 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4953 
4954    Output Parameter:
4955 +    rowb, colb - index sets of rows and columns of B to extract
4956 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4957 -    B_seq - the sequential matrix generated
4958 
4959     Level: developer
4960 
4961 @*/
4962 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4963 {
4964   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4965   PetscErrorCode    ierr;
4966   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4967   IS                isrowb,iscolb;
4968   Mat               *bseq;
4969 
4970   PetscFunctionBegin;
4971   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4972     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4973   }
4974   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4975 
4976   if (scall == MAT_INITIAL_MATRIX){
4977     start = A->cmap->rstart;
4978     cmap  = a->garray;
4979     nzA   = a->A->cmap->n;
4980     nzB   = a->B->cmap->n;
4981     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4982     ncols = 0;
4983     for (i=0; i<nzB; i++) {  /* row < local row index */
4984       if (cmap[i] < start) idx[ncols++] = cmap[i];
4985       else break;
4986     }
4987     imark = i;
4988     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4989     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4990     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4991     *brstart = imark;
4992     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4993   } else {
4994     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4995     isrowb = *rowb; iscolb = *colb;
4996     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4997     bseq[0] = *B_seq;
4998   }
4999   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5000   *B_seq = bseq[0];
5001   ierr = PetscFree(bseq);CHKERRQ(ierr);
5002   if (!rowb){
5003     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
5004   } else {
5005     *rowb = isrowb;
5006   }
5007   if (!colb){
5008     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
5009   } else {
5010     *colb = iscolb;
5011   }
5012   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5013   PetscFunctionReturn(0);
5014 }
5015 
5016 #undef __FUNCT__
5017 #define __FUNCT__ "MatGetBrowsOfAoCols"
5018 /*@C
5019     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5020     of the OFF-DIAGONAL portion of local A
5021 
5022     Collective on Mat
5023 
5024    Input Parameters:
5025 +    A,B - the matrices in mpiaij format
5026 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5027 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5028 .    startsj_r - similar to startsj for receives
5029 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5030 
5031    Output Parameter:
5032 +    B_oth - the sequential matrix generated
5033 
5034     Level: developer
5035 
5036 @*/
5037 PetscErrorCode  MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5038 {
5039   VecScatter_MPI_General *gen_to,*gen_from;
5040   PetscErrorCode         ierr;
5041   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5042   Mat_SeqAIJ             *b_oth;
5043   VecScatter             ctx=a->Mvctx;
5044   MPI_Comm               comm=((PetscObject)ctx)->comm;
5045   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5046   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5047   PetscScalar            *rvalues,*svalues;
5048   MatScalar              *b_otha,*bufa,*bufA;
5049   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5050   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5051   MPI_Status             *sstatus,rstatus;
5052   PetscMPIInt            jj;
5053   PetscInt               *cols,sbs,rbs;
5054   PetscScalar            *vals;
5055 
5056   PetscFunctionBegin;
5057   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5058     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5059   }
5060   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5061   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5062 
5063   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5064   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5065   rvalues  = gen_from->values; /* holds the length of receiving row */
5066   svalues  = gen_to->values;   /* holds the length of sending row */
5067   nrecvs   = gen_from->n;
5068   nsends   = gen_to->n;
5069 
5070   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5071   srow     = gen_to->indices;   /* local row index to be sent */
5072   sstarts  = gen_to->starts;
5073   sprocs   = gen_to->procs;
5074   sstatus  = gen_to->sstatus;
5075   sbs      = gen_to->bs;
5076   rstarts  = gen_from->starts;
5077   rprocs   = gen_from->procs;
5078   rbs      = gen_from->bs;
5079 
5080   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5081   if (scall == MAT_INITIAL_MATRIX){
5082     /* i-array */
5083     /*---------*/
5084     /*  post receives */
5085     for (i=0; i<nrecvs; i++){
5086       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5087       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5088       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5089     }
5090 
5091     /* pack the outgoing message */
5092     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5093     sstartsj[0] = 0;  rstartsj[0] = 0;
5094     len = 0; /* total length of j or a array to be sent */
5095     k = 0;
5096     for (i=0; i<nsends; i++){
5097       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5098       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5099       for (j=0; j<nrows; j++) {
5100         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5101         for (l=0; l<sbs; l++){
5102           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5103           rowlen[j*sbs+l] = ncols;
5104           len += ncols;
5105           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5106         }
5107         k++;
5108       }
5109       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5110       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5111     }
5112     /* recvs and sends of i-array are completed */
5113     i = nrecvs;
5114     while (i--) {
5115       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5116     }
5117     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5118 
5119     /* allocate buffers for sending j and a arrays */
5120     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5121     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5122 
5123     /* create i-array of B_oth */
5124     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5125     b_othi[0] = 0;
5126     len = 0; /* total length of j or a array to be received */
5127     k = 0;
5128     for (i=0; i<nrecvs; i++){
5129       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5130       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5131       for (j=0; j<nrows; j++) {
5132         b_othi[k+1] = b_othi[k] + rowlen[j];
5133         len += rowlen[j]; k++;
5134       }
5135       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5136     }
5137 
5138     /* allocate space for j and a arrrays of B_oth */
5139     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5140     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5141 
5142     /* j-array */
5143     /*---------*/
5144     /*  post receives of j-array */
5145     for (i=0; i<nrecvs; i++){
5146       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5147       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5148     }
5149 
5150     /* pack the outgoing message j-array */
5151     k = 0;
5152     for (i=0; i<nsends; i++){
5153       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5154       bufJ = bufj+sstartsj[i];
5155       for (j=0; j<nrows; j++) {
5156         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5157         for (ll=0; ll<sbs; ll++){
5158           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5159           for (l=0; l<ncols; l++){
5160             *bufJ++ = cols[l];
5161           }
5162           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5163         }
5164       }
5165       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5166     }
5167 
5168     /* recvs and sends of j-array are completed */
5169     i = nrecvs;
5170     while (i--) {
5171       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5172     }
5173     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5174   } else if (scall == MAT_REUSE_MATRIX){
5175     sstartsj = *startsj;
5176     rstartsj = *startsj_r;
5177     bufa     = *bufa_ptr;
5178     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5179     b_otha   = b_oth->a;
5180   } else {
5181     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5182   }
5183 
5184   /* a-array */
5185   /*---------*/
5186   /*  post receives of a-array */
5187   for (i=0; i<nrecvs; i++){
5188     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5189     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5190   }
5191 
5192   /* pack the outgoing message a-array */
5193   k = 0;
5194   for (i=0; i<nsends; i++){
5195     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5196     bufA = bufa+sstartsj[i];
5197     for (j=0; j<nrows; j++) {
5198       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5199       for (ll=0; ll<sbs; ll++){
5200         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5201         for (l=0; l<ncols; l++){
5202           *bufA++ = vals[l];
5203         }
5204         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5205       }
5206     }
5207     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5208   }
5209   /* recvs and sends of a-array are completed */
5210   i = nrecvs;
5211   while (i--) {
5212     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5213   }
5214   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5215   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5216 
5217   if (scall == MAT_INITIAL_MATRIX){
5218     /* put together the new matrix */
5219     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5220 
5221     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5222     /* Since these are PETSc arrays, change flags to free them as necessary. */
5223     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5224     b_oth->free_a  = PETSC_TRUE;
5225     b_oth->free_ij = PETSC_TRUE;
5226     b_oth->nonew   = 0;
5227 
5228     ierr = PetscFree(bufj);CHKERRQ(ierr);
5229     if (!startsj || !bufa_ptr){
5230       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5231       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5232     } else {
5233       *startsj   = sstartsj;
5234       *startsj_r = rstartsj;
5235       *bufa_ptr  = bufa;
5236     }
5237   }
5238   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5239   PetscFunctionReturn(0);
5240 }
5241 
5242 #undef __FUNCT__
5243 #define __FUNCT__ "MatGetCommunicationStructs"
5244 /*@C
5245   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5246 
5247   Not Collective
5248 
5249   Input Parameters:
5250 . A - The matrix in mpiaij format
5251 
5252   Output Parameter:
5253 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5254 . colmap - A map from global column index to local index into lvec
5255 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5256 
5257   Level: developer
5258 
5259 @*/
5260 #if defined (PETSC_USE_CTABLE)
5261 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5262 #else
5263 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5264 #endif
5265 {
5266   Mat_MPIAIJ *a;
5267 
5268   PetscFunctionBegin;
5269   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5270   PetscValidPointer(lvec, 2);
5271   PetscValidPointer(colmap, 3);
5272   PetscValidPointer(multScatter, 4);
5273   a = (Mat_MPIAIJ *) A->data;
5274   if (lvec) *lvec = a->lvec;
5275   if (colmap) *colmap = a->colmap;
5276   if (multScatter) *multScatter = a->Mvctx;
5277   PetscFunctionReturn(0);
5278 }
5279 
5280 EXTERN_C_BEGIN
5281 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5282 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5283 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5284 EXTERN_C_END
5285 
5286 #undef __FUNCT__
5287 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5288 /*
5289     Computes (B'*A')' since computing B*A directly is untenable
5290 
5291                n                       p                          p
5292         (              )       (              )         (                  )
5293       m (      A       )  *  n (       B      )   =   m (         C        )
5294         (              )       (              )         (                  )
5295 
5296 */
5297 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5298 {
5299   PetscErrorCode     ierr;
5300   Mat                At,Bt,Ct;
5301 
5302   PetscFunctionBegin;
5303   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5304   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5305   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5306   ierr = MatDestroy(At);CHKERRQ(ierr);
5307   ierr = MatDestroy(Bt);CHKERRQ(ierr);
5308   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5309   ierr = MatDestroy(Ct);CHKERRQ(ierr);
5310   PetscFunctionReturn(0);
5311 }
5312 
5313 #undef __FUNCT__
5314 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5315 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5316 {
5317   PetscErrorCode ierr;
5318   PetscInt       m=A->rmap->n,n=B->cmap->n;
5319   Mat            Cmat;
5320 
5321   PetscFunctionBegin;
5322   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5323   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5324   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5325   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5326   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5327   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5328   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5329   *C   = Cmat;
5330   PetscFunctionReturn(0);
5331 }
5332 
5333 /* ----------------------------------------------------------------*/
5334 #undef __FUNCT__
5335 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5336 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5337 {
5338   PetscErrorCode ierr;
5339 
5340   PetscFunctionBegin;
5341   if (scall == MAT_INITIAL_MATRIX){
5342     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5343   }
5344   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5345   PetscFunctionReturn(0);
5346 }
5347 
5348 EXTERN_C_BEGIN
5349 #if defined(PETSC_HAVE_MUMPS)
5350 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5351 #endif
5352 #if defined(PETSC_HAVE_PASTIX)
5353 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5354 #endif
5355 #if defined(PETSC_HAVE_SUPERLU_DIST)
5356 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5357 #endif
5358 #if defined(PETSC_HAVE_SPOOLES)
5359 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5360 #endif
5361 EXTERN_C_END
5362 
5363 /*MC
5364    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5365 
5366    Options Database Keys:
5367 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5368 
5369   Level: beginner
5370 
5371 .seealso: MatCreateMPIAIJ()
5372 M*/
5373 
5374 EXTERN_C_BEGIN
5375 #undef __FUNCT__
5376 #define __FUNCT__ "MatCreate_MPIAIJ"
5377 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5378 {
5379   Mat_MPIAIJ     *b;
5380   PetscErrorCode ierr;
5381   PetscMPIInt    size;
5382 
5383   PetscFunctionBegin;
5384   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5385 
5386   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5387   B->data         = (void*)b;
5388   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5389   B->rmap->bs     = 1;
5390   B->assembled    = PETSC_FALSE;
5391 
5392   B->insertmode   = NOT_SET_VALUES;
5393   b->size         = size;
5394   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5395 
5396   /* build cache for off array entries formed */
5397   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5398   b->donotstash  = PETSC_FALSE;
5399   b->colmap      = 0;
5400   b->garray      = 0;
5401   b->roworiented = PETSC_TRUE;
5402 
5403   /* stuff used for matrix vector multiply */
5404   b->lvec      = PETSC_NULL;
5405   b->Mvctx     = PETSC_NULL;
5406 
5407   /* stuff for MatGetRow() */
5408   b->rowindices   = 0;
5409   b->rowvalues    = 0;
5410   b->getrowactive = PETSC_FALSE;
5411 
5412 #if defined(PETSC_HAVE_SPOOLES)
5413   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5414                                      "MatGetFactor_mpiaij_spooles",
5415                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5416 #endif
5417 #if defined(PETSC_HAVE_MUMPS)
5418   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5419                                      "MatGetFactor_aij_mumps",
5420                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5421 #endif
5422 #if defined(PETSC_HAVE_PASTIX)
5423   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5424 					   "MatGetFactor_mpiaij_pastix",
5425 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5426 #endif
5427 #if defined(PETSC_HAVE_SUPERLU_DIST)
5428   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5429                                      "MatGetFactor_mpiaij_superlu_dist",
5430                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5431 #endif
5432   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5433                                      "MatStoreValues_MPIAIJ",
5434                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5435   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5436                                      "MatRetrieveValues_MPIAIJ",
5437                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5438   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5439 				     "MatGetDiagonalBlock_MPIAIJ",
5440                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5441   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5442 				     "MatIsTranspose_MPIAIJ",
5443 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5444   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5445 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5446 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5447   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5448 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5449 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5450   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5451 				     "MatDiagonalScaleLocal_MPIAIJ",
5452 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5453   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5454                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5455                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5456   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5457                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5458                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5459   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5460                                      "MatConvert_MPIAIJ_MPISBAIJ",
5461                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5462   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5463                                      "MatMatMult_MPIDense_MPIAIJ",
5464                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5465   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5466                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5467                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5468   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5469                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5470                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5471   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5472   PetscFunctionReturn(0);
5473 }
5474 EXTERN_C_END
5475 
5476 #undef __FUNCT__
5477 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5478 /*@
5479      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5480          and "off-diagonal" part of the matrix in CSR format.
5481 
5482    Collective on MPI_Comm
5483 
5484    Input Parameters:
5485 +  comm - MPI communicator
5486 .  m - number of local rows (Cannot be PETSC_DECIDE)
5487 .  n - This value should be the same as the local size used in creating the
5488        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5489        calculated if N is given) For square matrices n is almost always m.
5490 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5491 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5492 .   i - row indices for "diagonal" portion of matrix
5493 .   j - column indices
5494 .   a - matrix values
5495 .   oi - row indices for "off-diagonal" portion of matrix
5496 .   oj - column indices
5497 -   oa - matrix values
5498 
5499    Output Parameter:
5500 .   mat - the matrix
5501 
5502    Level: advanced
5503 
5504    Notes:
5505        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5506 
5507        The i and j indices are 0 based
5508 
5509        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5510 
5511        This sets local rows and cannot be used to set off-processor values.
5512 
5513        You cannot later use MatSetValues() to change values in this matrix.
5514 
5515 .keywords: matrix, aij, compressed row, sparse, parallel
5516 
5517 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5518           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5519 @*/
5520 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5521 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5522 {
5523   PetscErrorCode ierr;
5524   Mat_MPIAIJ     *maij;
5525 
5526  PetscFunctionBegin;
5527   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5528   if (i[0]) {
5529     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5530   }
5531   if (oi[0]) {
5532     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5533   }
5534   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5535   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5536   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5537   maij = (Mat_MPIAIJ*) (*mat)->data;
5538   maij->donotstash     = PETSC_TRUE;
5539   (*mat)->preallocated = PETSC_TRUE;
5540 
5541   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5542   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5543   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5544   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5545 
5546   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5547   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5548 
5549   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5550   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5551   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5552   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5553 
5554   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5555   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5556   PetscFunctionReturn(0);
5557 }
5558 
5559 /*
5560     Special version for direct calls from Fortran
5561 */
5562 #include "private/fortranimpl.h"
5563 
5564 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5565 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5566 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5567 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5568 #endif
5569 
5570 /* Change these macros so can be used in void function */
5571 #undef CHKERRQ
5572 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5573 #undef SETERRQ2
5574 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5575 #undef SETERRQ
5576 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5577 
5578 EXTERN_C_BEGIN
5579 #undef __FUNCT__
5580 #define __FUNCT__ "matsetvaluesmpiaij_"
5581 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5582 {
5583   Mat             mat = *mmat;
5584   PetscInt        m = *mm, n = *mn;
5585   InsertMode      addv = *maddv;
5586   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5587   PetscScalar     value;
5588   PetscErrorCode  ierr;
5589 
5590   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5591   if (mat->insertmode == NOT_SET_VALUES) {
5592     mat->insertmode = addv;
5593   }
5594 #if defined(PETSC_USE_DEBUG)
5595   else if (mat->insertmode != addv) {
5596     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5597   }
5598 #endif
5599   {
5600   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5601   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5602   PetscBool       roworiented = aij->roworiented;
5603 
5604   /* Some Variables required in the macro */
5605   Mat             A = aij->A;
5606   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5607   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5608   MatScalar       *aa = a->a;
5609   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5610   Mat             B = aij->B;
5611   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5612   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5613   MatScalar       *ba = b->a;
5614 
5615   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5616   PetscInt        nonew = a->nonew;
5617   MatScalar       *ap1,*ap2;
5618 
5619   PetscFunctionBegin;
5620   for (i=0; i<m; i++) {
5621     if (im[i] < 0) continue;
5622 #if defined(PETSC_USE_DEBUG)
5623     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5624 #endif
5625     if (im[i] >= rstart && im[i] < rend) {
5626       row      = im[i] - rstart;
5627       lastcol1 = -1;
5628       rp1      = aj + ai[row];
5629       ap1      = aa + ai[row];
5630       rmax1    = aimax[row];
5631       nrow1    = ailen[row];
5632       low1     = 0;
5633       high1    = nrow1;
5634       lastcol2 = -1;
5635       rp2      = bj + bi[row];
5636       ap2      = ba + bi[row];
5637       rmax2    = bimax[row];
5638       nrow2    = bilen[row];
5639       low2     = 0;
5640       high2    = nrow2;
5641 
5642       for (j=0; j<n; j++) {
5643         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5644         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5645         if (in[j] >= cstart && in[j] < cend){
5646           col = in[j] - cstart;
5647           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5648         } else if (in[j] < 0) continue;
5649 #if defined(PETSC_USE_DEBUG)
5650         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5651 #endif
5652         else {
5653           if (mat->was_assembled) {
5654             if (!aij->colmap) {
5655               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5656             }
5657 #if defined (PETSC_USE_CTABLE)
5658             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5659 	    col--;
5660 #else
5661             col = aij->colmap[in[j]] - 1;
5662 #endif
5663             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5664               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5665               col =  in[j];
5666               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5667               B = aij->B;
5668               b = (Mat_SeqAIJ*)B->data;
5669               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5670               rp2      = bj + bi[row];
5671               ap2      = ba + bi[row];
5672               rmax2    = bimax[row];
5673               nrow2    = bilen[row];
5674               low2     = 0;
5675               high2    = nrow2;
5676               bm       = aij->B->rmap->n;
5677               ba = b->a;
5678             }
5679           } else col = in[j];
5680           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5681         }
5682       }
5683     } else {
5684       if (!aij->donotstash) {
5685         if (roworiented) {
5686           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5687         } else {
5688           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5689         }
5690       }
5691     }
5692   }}
5693   PetscFunctionReturnVoid();
5694 }
5695 EXTERN_C_END
5696 
5697